Ejemplo n.º 1
0
def verify_resource_params(resource: dict, warnings_as_exceptions: bool = False) -> dict:
    """
    Perform simplistic resource.py parameters validation
    raises TypeError or NameError on problems
    """
    global resource_renamed_parameters
    for old_key, new_key in resource_renamed_parameters:
        if old_key in resource:
            resource[new_key] = resource[old_key]

            if not warnings_as_exceptions:
                log.warning("Resource parameter {} was renamed to {}".format(old_key, new_key))
            else:
                raise DeprecationWarning("Resource parameter {} was renamed to {}".format(old_key, new_key))

    # @todo check string templates for deprecated variables
    global resource_parameters_types
    for variable, (m_type, nullable, must) in resource_parameters_types.items():
        if (must is True) and (variable not in resource):
            raise NameError("Syntax error in " + resource['name'] + "\nVariable %s is not set" % (variable,))
        if variable not in resource:
            continue
        if resource[variable] is None and not nullable:
            raise TypeError("Syntax error in " + resource['name'] + "\nVariable %s can not be None" % (variable,))
        if not isinstance(resource[variable], m_type) and not (resource[variable] is None and nullable):
            raise TypeError("Syntax error in " + resource['name'] +
                            "\nVariable %s should be %s" % (variable, str(m_type)) +
                            ". But it is " + str(type(resource[variable])))
    # level 2 parameters
    # check that parameters for presents and type
    # format: key,type,can be None,must have parameter
    parameters_types_2 = {
        'remote_access_node': [str, True if resource['batch_scheduler'].lower() == "openstack" else False, True]
    }

    for variable, (m_type, nullable, must) in parameters_types_2.items():
        if (must is True) and (variable not in resource):
            raise NameError("Syntax error in " + resource['name'] + "\nVariable %s is not set" % (variable,))
        if variable not in resource:
            continue
        if resource[variable] is None and not nullable:
            raise TypeError("Syntax error in " + resource['name'] + "\nVariable %s can not be None" % (variable,))
        if not isinstance(resource[variable], m_type) and not (resource[variable] is None and nullable):
            raise TypeError("Syntax error in " + resource['name'] +
                            "\nVariable %s should be %s" % (variable, str(m_type)) +
                            ". But it is " + str(type(resource[variable])))

    # mapped parameters which still uses internally different name
    # these eventually should be renamed
    resource_renamed_parameters_internal_name = [
    ]

    for old_key, new_key in resource_renamed_parameters_internal_name:
        if old_key in resource:
            resource[new_key] = resource[old_key]

    return resource
Ejemplo n.º 2
0
def verify_app_params(app: dict, app_on_resource: dict, warnings_as_exceptions: bool = False) -> dict:
    """
    Perform simplistic app.py parameters validation

    raises error
    """
    # mapped renamed parameters
    global app_renamed_parameters
    for old_key, new_key in app_renamed_parameters:
        if old_key in app_on_resource:
            app[new_key] = app_on_resource[old_key]
            app_on_resource[new_key] = app_on_resource[old_key]
            if not warnings_as_exceptions:
                log.warning("App parameter %s was renamed to %s", old_key, new_key)
            else:
                raise DeprecationWarning("App parameter {} was renamed to {}".format(old_key, new_key))
        if old_key in app:
            app[new_key] = app[old_key]
            app_on_resource[new_key] = app[old_key]
            if not warnings_as_exceptions:
                log.warning("App parameter %s was renamed to %s", old_key, new_key)
            else:
                raise DeprecationWarning("App parameter {} was renamed to {}".format(old_key, new_key))

    # check that parameters for presents and type
    # format: key,type,can be None,must have parameter
    parameters_types = [
        ['parser', str, False, True],
        ['executable', str, True, True],
        ['input_param', str, True, True],
        ['walltime_limit', int, False, True],
        ['run_script', dict, False, False]
    ]

    for variable, m_type, nullable, must in parameters_types:
        if must and (variable not in app):
            raise NameError("Syntax error in " + app['name'] + "\nVariable %s is not set" % (variable,))
        if variable not in app:
            continue
        if app[variable] is None and not nullable:
            raise TypeError("Syntax error in " + app['name'] + "\nVariable %s can not be None" % (variable,))
        if not isinstance(app[variable], m_type) and not (app[variable] is None and nullable):
            raise TypeError("Syntax error in " + app['name'] +
                            "\nVariable %s should be %s" % (variable, str(m_type)) +
                            ". But it is " + str(type(app[variable])))

    # mapped parameters which still uses internally different name
    # these eventually should be renamed
    renamed_parameters_internal_name = [
    ]

    for old_key, new_key in renamed_parameters_internal_name:
        if old_key in app:
            app_on_resource[old_key] = app_on_resource[new_key]
    return app_on_resource
Ejemplo n.º 3
0
def enable_resource_for_execution(resource):
    """populate mod_appkernel database and allow execution of jobs on this resource"""
    if akrr.dry_run:
        return
    resource_name = resource['name']
    try:
        con_ak, cur_ak = akrr.db.get_ak_db(True)

        cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                       (resource_name, ))
        resource_in_ak_db = cur_ak.fetchall()
        if len(resource_in_ak_db) == 0:
            log.warning(
                "There is no record of %s in mod_appkernel.resource will add one.",
                resource_name)
            cur_ak.execute(
                '''INSERT INTO resource (resource,nickname,description,enabled,visible)
                        VALUES(%s,%s,%s,0,0);''',
                (resource['name'], resource['name'], resource['info']))
            con_ak.commit()

            cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''',
                           (resource_name, ))
            resource_in_ak_db = cur_ak.fetchall()
        resource_in_ak_db = resource_in_ak_db[0]
        # enable and make visible
        cur_ak.execute(
            '''UPDATE resource
                        SET enabled=1,visible=1
                        WHERE resource_id=%s;''',
            (resource_in_ak_db['resource_id'], ))
        con_ak.commit()
        log.info(
            "Enabled %s in mod_appkernel.resource for tasks execution and made it visible to XDMoD UI.",
            resource_name)
    except MySQLdb.Error:
        log.error("Can not connect to AK DB\n" "Probably invalid credential")

    # enabling resource for execution
    try:
        r = akrrrestclient.put('/resources/' + resource_name + '/on')
        if r.status_code == 200:
            log.info('Successfully enabled ' + resource_name)
        else:
            log.error(
                "Can not enable resource through AKRR REST API ( %s )\nSee server response below\n%s",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
    except requests.RequestException:
        log.error(
            "Can not enable resource through AKRR REST API ( %s )\n"
            "Is it still running?\n", akrrrestclient.restapi_host)
Ejemplo n.º 4
0
def check_if_test_job_already_submitted(resource, app_name="test"):
    """check if the test job is already submitted, return task id if it is submitted"""
    task_id = None
    test_job_lock_filename = get_test_job_lock_filename(resource, app_name)
    if os.path.isfile(test_job_lock_filename):
        fin = open(test_job_lock_filename, "r")
        task_id = int(fin.readline())
        fin.close()

        r = akrrrestclient.get('/tasks/' + str(task_id))
        if r.status_code != 200:
            task_id = None
        else:
            log.warning_count += 1
            log.warning(
                "\nWARNING %d: Seems this is rerun of this script, will monitor task with task_id = %d ",
                log.warning_count, task_id)
            log.warning("To submit new task delete %s\n",
                        test_job_lock_filename)

        # check how old is it
    return task_id
Ejemplo n.º 5
0
def get_daemon_pid(akrr_pid_file, delete_pid_file_if_daemon_down=False):
    """
    Return the PID of AKRR server
    """
    import os
    import psutil
    pid = None
    if os.path.isfile(akrr_pid_file):
        fin = open(akrr_pid_file, "r")
        lines = fin.readlines()
        pid = int(lines[0])
        fin.close()

        # Check For the existence of a unix pid
        if psutil.pid_exists(pid):
            try:
                cmd = " ".join(psutil.Process(pid=pid).cmdline())

                if cmd.count('akrr') and cmd.count('daemon') and cmd.count(
                        'start'):
                    return pid
            except Exception as e:
                log.log_traceback(str(e))
        else:
            # if here means that previous session was crushed
            if delete_pid_file_if_daemon_down:
                log.warning(
                    "WARNING:File %s exists meaning that the previous execution was finished incorrectly."
                    "Removing pid file." % akrr_pid_file)
                os.remove(akrr_pid_file)
                return None
            else:
                raise IOError(
                    "File %s exists meaning that the previous execution was finished incorrectly."
                    % akrr_pid_file)

    return pid
Ejemplo n.º 6
0
def check_create_dirs(rsh, resource):
    log.info("Checking directory locations\n")

    d = resource['akrr_data']
    log.info("Checking: %s:%s", resource['remote_access_node'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg)

    d = resource['appkernel_dir']
    log.info("Checking: %s:%s", resource['remote_access_node'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg)

    d = resource['network_scratch']
    log.info("Checking: %s:%s", resource['remote_access_node'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=True)

    if status is True:
        log.info(msg)
    else:
        log.warning_count += 1
        log.warning(msg)
        log.warning(
            "WARNING %d: network scratch might be have a different location on head node, "
            "so if it is by design it is ok", log.warning_count)

    d = resource['local_scratch']
    log.info("Checking: %s:%s", resource['remote_access_node'], d)
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning_count += 1
        log.warning(msg)
        log.warning(
            "WARNING %d: local scratch might be have a different location on head node, "
            "so if it is by design it is ok", log.warning_count)

    log.empty_line()
Ejemplo n.º 7
0
def load_app_on_resource(app_name: str,
                         resource_name: str,
                         resource: Dict,
                         app: Dict,
                         app_on_resource_cfg_filename: str = None,
                         validate: bool = True) -> Dict:
    """
    load app configuration for the resource file, do minimalistic validation
    return dict with app parameters

    raises error if can not load
    """
    log.debug("Loading app %s", app_name)
    from akrr.util import exec_files_to_dict
    try:
        # load resource specific parameters
        if app_on_resource_cfg_filename is None:
            app_on_resource_cfg_filename = os.path.join(
                cfg_dir, "resources", resource_name, app_name + ".app.conf")
        if not os.path.isfile(app_on_resource_cfg_filename):
            # raise error because a specific app on resource was asked
            if app['need_resource_specific_conf']:
                raise AkrrError(
                    "application kernel configuration file do not exists (%s)!"
                    % app_on_resource_cfg_filename)
            else:
                return {}

        # init default
        app_on_resource = copy.deepcopy(
            app['appkernel_on_resource']['default'])
        if 'name' not in app_on_resource:
            app_on_resource['name'] = app_name
        if 'nickname' not in app_on_resource:
            app_on_resource['nickname'] = app_name + ".@nnodes@"

        # set execution_method from resource config
        execution_method = resource.get("execution_method", "hpc")

        # set execution_method from app on resource config

        execution_method = _get_app_execution_method(
            app_on_resource_cfg_filename, default=execution_method)

        # read default config
        app_on_resource_cfg_default = os.path.join(
            default_dir, "%s.%s.app.conf" % (app_name, execution_method))

        if os.path.isfile(app_on_resource_cfg_default):
            app_on_resource = exec_files_to_dict(app_on_resource_cfg_default,
                                                 var_in=app_on_resource)
        elif execution_method != "hpc":
            log.warning("%s doen't have default for %s execution method" %
                        (app_name, execution_method))

        # read resource specific configuration
        app_on_resource[
            'resource_specific_app_cfg_filename'] = app_on_resource_cfg_filename
        app_on_resource['resource_specific_app_cfg_file_last_mod_time'] = 0
        if os.path.isfile(app_on_resource_cfg_filename):
            app_on_resource = exec_files_to_dict(app_on_resource_cfg_filename,
                                                 var_in=app_on_resource)
            app_on_resource['resource_specific_app_cfg_file_last_mod_time'] = \
                os.path.getmtime(app_on_resource_cfg_filename)

        # validation combined config
        if validate:
            app_combined = {}
            app_combined.update(resource)
            app_combined.update(app)
            app_combined.update(app_on_resource)
            app_on_resource = verify_app_params(app_combined, app_on_resource)

        return app_on_resource
    except Exception:
        log.exception(
            "Exception occurred during app kernel configuration loading for %s."
            % app_name)
        raise AkrrError("Can not load app configuration for %s." % app_name)
Ejemplo n.º 8
0
def resource_add(config):
    """add resource, config should have following members
        dry_run - Dry Run No files will actually be created
        minimalistic - Minimize questions number, configuration files will be edited manually
        no-ping - do not run ping to test headnode name
        verbose
    """
    global verbose
    global no_ping
    global minimalistic
    global resource_name
    global remote_access_node
    global remote_access_method
    global remote_copy_method
    global ssh_username
    global ssh_password
    global ssh_private_key_file
    global ssh_private_key_password
    global network_scratch
    global local_scratch
    global akrr_data
    global appkernel_dir
    global batch_scheduler
    global batch_job_header_template

    if config.verbose:
        verbose = True

    log.info("Beginning Initiation of New Resource...")
    verbose = config.verbose
    akrr.dry_run = config.dry_run
    no_ping = config.no_ping
    minimalistic = config.minimalistic

    log.info("Retrieving Resources from XDMoD Database...")
    # RETRIEVE: the resources from XDMoD
    resources = retrieve_resources_from_xdmod()
    log.info("Found following resources from XDMoD Database:\n" +
             "    resource_id  name\n" + "\n".join([
                 "    %11d  %-40s" % (resource_id, resource_name)
                 for resource_name, resource_id in resources
             ]) + "\n")

    if len(resources) > 0:
        while True:
            log.log_input(
                'Enter resource_id for import (enter 0 for no match):')
            resource_id = input()
            if validate_resource_id(resource_id, resources):
                break
            log.warning("Incorrect resource_id try again")
        log.empty_line()
        resource_id = int(resource_id)
    else:
        resource_id = 0

    if resource_id <= 0:  # i.e. no match from XDMoD DB
        resource_id = None

    resource_name = ""
    while True:
        if resource_id is None:
            log.log_input('Enter AKRR resource name:')
            resource_name = input()
        else:
            resource_name2 = get_resource_name_by_id(resource_id, resources)
            log.log_input(
                'Enter AKRR resource name, hit enter to use same name as in XDMoD Database [%s]:'
                % (resource_name2, ))
            resource_name = input()
            if resource_name.strip() == "":
                resource_name = resource_name2

        if validate_resource_name(resource_name):
            break
    log.empty_line()

    while True:
        log.log_input(
            'Enter queuing system on resource (slurm, pbs or openstack): ')
        queuing_system = input()
        if validate_queuing_system(queuing_system):
            break
        else:
            log.error("Incorrect queuing_system try again")

    batch_scheduler = queuing_system
    log.empty_line()

    if minimalistic is False:
        get_remote_access_method()
        get_system_characteristics()
        get_file_system_access_points()

    log.debug(
        "Summary of parameters" + "resource_name: {}".format(resource_name) +
        "remote_access_node: {}".format(remote_access_node) +
        "remote_access_method: {}".format(remote_access_method) +
        "remote_copy_method: {}".format(remote_copy_method) +
        "ssh_username: {}".format(ssh_username) +
        "ssh_password: {}".format(ssh_password) +
        "ssh_private_key_file: {}".format(ssh_private_key_file) +
        "ssh_private_key_password: {}".format(ssh_private_key_password) +
        "network_scratch: {}".format(network_scratch) +
        "local_scratch: {}".format(local_scratch) +
        "akrr_data: {}".format(akrr_data) +
        "appkernel_dir: {}".format(appkernel_dir) +
        "batch_scheduler: {}".format(batch_scheduler) +
        "batch_job_header_template: {}".format(batch_job_header_template) +
        "\n")

    generate_resource_config(resource_id, resource_name, queuing_system)
    log.info("Initiation of new resource is completed.\n"
             "    Edit batch_job_header_template variable in {}\n"
             "    and move to resource validation and deployment step.\n"
             "    i.e. execute:\n"
             "        akrr resource deploy -r {}".format(
                 resource_cfg_filename, resource_name))
Ejemplo n.º 9
0
def get_file_system_access_points():
    global resource_name
    global network_scratch
    global local_scratch
    global akrr_data
    global appkernel_dir

    home_dir = akrr.util.ssh.ssh_command(rsh, "echo $HOME").strip()
    scratch_network_dir = akrr.util.ssh.ssh_command(rsh,
                                                    "echo $SCRATCH").strip()

    # local_scratch
    local_scratch_default = "/tmp"
    while True:
        log.log_input(
            "Enter location of local scratch (visible only to single node):")
        local_scratch = input("[%s]" % local_scratch_default)
        if local_scratch.strip() == "":
            local_scratch = local_scratch_default
        status, msg = check_dir_simple(rsh, local_scratch)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.warning(msg)
            log.warning(
                'local scratch might be have a different location on head node, so if it is by design it is ok'
            )
            log.empty_line()
            break
    local_scratch = akrr.util.ssh.ssh_command(rsh, "echo %s" %
                                              (local_scratch, )).strip()
    # network_scratch
    network_scratch_default = ""
    if scratch_network_dir != "":
        network_scratch_default = scratch_network_dir
    network_scratch_visible = False
    while True:
        log.log_input(
            "Enter location of network scratch (visible only to all nodes),"
            "used for temporary storage of app kernel input/output:")
        if network_scratch_default != "":
            network_scratch = input("[%s]" % network_scratch_default)
            if network_scratch.strip() == "":
                network_scratch = network_scratch_default
        else:
            network_scratch = input("")

        if network_scratch == "":
            log.error("Incorrect value for network_scratch, try again")
            continue

        status, msg = check_dir(rsh,
                                network_scratch,
                                exit_on_fail=False,
                                try_to_create=True)
        if status:
            log.info(msg)
            network_scratch_visible = True
            log.empty_line()
            break
        else:
            log.warning(msg)
            break
    network_scratch = akrr.util.ssh.ssh_command(
        rsh, "echo %s" % (network_scratch, )).strip()
    # appkernel_dir
    appker_dir_default = os.path.join(home_dir, "appker", resource_name)
    while True:
        log.log_input(
            "Enter future location of app kernels input and executable files:")
        appkernel_dir = input("[%s]" % appker_dir_default)
        if appkernel_dir.strip() == "":
            appkernel_dir = appker_dir_default
        status, msg = check_dir(rsh,
                                appkernel_dir,
                                exit_on_fail=False,
                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    appkernel_dir = akrr.util.ssh.ssh_command(rsh, "echo %s" %
                                              (appkernel_dir, )).strip()
    # akrr_data
    akrr_data_default = os.path.join(home_dir, "akrr_data", resource_name)
    if network_scratch_visible:
        akrr_data_default = os.path.join(network_scratch, "akrr_data",
                                         resource_name)
    while True:
        log.log_input(
            "Enter future locations for app kernels working directories (can or even should be on scratch space):"
        )
        akrr_data = input("[%s]" % akrr_data_default)
        if akrr_data.strip() == "":
            akrr_data = akrr_data_default
        status, msg = check_dir(rsh,
                                akrr_data,
                                exit_on_fail=False,
                                try_to_create=True)
        if status:
            log.info(msg)
            log.empty_line()
            break
        else:
            log.error(msg)
    akrr_data = akrr.util.ssh.ssh_command(rsh,
                                          "echo %s" % (akrr_data, )).strip()
Ejemplo n.º 10
0
    def generate_batch_job_script(self):
        if self.JobScriptName is None:
            self.JobScriptName = self.get_job_script_name()

        # get walltime from DB
        db_defaults = {}
        try:
            db, cur = akrr.db.get_akrr_db()

            cur.execute('''SELECT resource,app,resource_param,app_param FROM active_tasks
            WHERE task_id=%s ;''', (self.task_id,))
            raw = cur.fetchall()
            if len(raw) > 0:
                (resource, app, resource_param, app_param) = raw[0]

                cur.execute("""SELECT walltime_limit
                    FROM akrr_default_walllimit
                    WHERE resource=%s AND app=%s AND resource_param=%s AND app_param=%s """,
                            (resource, app, resource_param, app_param))
                raw = cur.fetchall()

                if len(raw) > 0:
                    db_defaults['walltime_limit'] = raw[0][0]

            # db.commit()
            cur.close()
            del db
        except Exception as e:
            pass
            raise e
        # create job-script
        try:
            batch_vars = {}
            appkernel_on_resource = {}
            if 'appkernel_on_resource' in self.app:
                if self.resourceName in self.app['appkernel_on_resource']:
                    appkernel_on_resource = self.app['appkernel_on_resource'][self.resourceName]
                elif 'default' in self.app['appkernel_on_resource']:
                    appkernel_on_resource = self.app['appkernel_on_resource']['default']

            for di in [self.resource, self.app, appkernel_on_resource, db_defaults, self.resourceParam, self.appParam]:
                batch_vars.update(di)

            # get auto-walltime limit
            try:
                if 'auto_walltime_limit' in batch_vars and batch_vars['auto_walltime_limit'] is True:
                    log.info("auto_walltime_limit is on, trying to estimate walltime limit...")
                    auto_walltime_limit_overhead = 1.2
                    if 'auto_walltime_limit_overhead' in batch_vars:
                        auto_walltime_limit_overhead = batch_vars['auto_walltime_limit_overhead'] + 1.0
                    # query last 20 executions of this appkernel on that resource with that node count

                    db, cur = akrr.db.get_akrr_db(True)

                    cur.execute('''SELECT resource,reporter,reporternickname,collected,status,walltime FROM akrr_xdmod_instanceinfo
                        WHERE  `resource`=%s AND `reporternickname` =  %s
                        ORDER BY  `akrr_xdmod_instanceinfo`.`collected` DESC 
                        LIMIT 0 , 20''', (self.resource['name'], "%s.%d" % (self.app['name'], batch_vars['nnodes'])))

                    raw = cur.fetchall()

                    i = 0
                    last_five_runs_successfull = True
                    max_walltime = 0.0
                    for r in raw:
                        if i < 5 and r['status'] == 0:
                            last_five_runs_successfull = False
                        if r['status'] == 1 and r['walltime'] > max_walltime:
                            max_walltime = r['walltime']
                        i += 1
                    if i < 5:
                        log.info("There are only %d previous run, need at least 5 for walltime limit autoset" % i)
                    else:
                        if not last_five_runs_successfull:
                            log.warning("One of last 5 runs have failed. Would not use autoset.")
                        else:
                            if max_walltime < 120:
                                log.info("Previous walltime was less than 2 minutes, will set walltime limit to 2 minutes")
                                max_walltime = 120
                                batch_vars['walltime_limit'] = 2
                            else:
                                log.info(
                                    "Max walltime was %.1f s, will change walltime limit from %.1f minutes to %d minutes" %
                                    (max_walltime, batch_vars['walltime_limit'],
                                    int(auto_walltime_limit_overhead * max_walltime / 60.0 + 0.99)))
                                batch_vars['walltime_limit'] = int((auto_walltime_limit_overhead * max_walltime / 60.0 + 0.99))
                    cur.close()
                    del db
            except Exception as e:
                log.error("Exception happened in AkrrTaskHandlerAppKer.generate_batch_job_script: %s" % str(e))

            # calculate NNodes and NCores
            if 'nnodes' in batch_vars:
                tmp_num_nodes = batch_vars['nnodes']
                tmp_num_cores = tmp_num_nodes * batch_vars['ppn']
            else:
                tmp_num_cores = batch_vars['ncores']
                if tmp_num_cores % batch_vars['ppn'] == 0:
                    tmp_num_nodes = tmp_num_cores / batch_vars['ppn']
                else:
                    tmp_num_nodes = (tmp_num_cores / batch_vars['ppn']) + 1

            assert isinstance(tmp_num_nodes, int)
            assert isinstance(tmp_num_cores, int)

            batch_vars['akrr_num_of_cores'] = tmp_num_cores
            batch_vars['akrr_num_of_nodes'] = tmp_num_nodes

            # Set batch_vars remaps
            batch_vars['akrr_ppn'] = batch_vars['ppn']
            batch_vars['akrrNCoresToBorder'] = batch_vars['akrr_ppn'] * batch_vars['akrr_num_of_nodes']
            batch_vars['akrr_task_work_dir'] = self.remoteTaskDir
            batch_vars['akrr_walltime_limit'] = "%02d:%02d:00" % (
                int(batch_vars['walltime_limit']) / 60, int(batch_vars['walltime_limit']) % 60)
            batch_vars['akrr_appkernel_name'] = self.app['name']
            batch_vars['akrr_resource_name'] = self.resource['name']
            batch_vars['akrr_time_stamp'] = self.timeStamp
            if batch_vars['akrr_num_of_nodes'] == 1:
                batch_vars['akrrPPN4NodesOrCores4OneNode'] = batch_vars['akrr_num_of_cores']
            else:
                batch_vars['akrrPPN4NodesOrCores4OneNode'] = batch_vars['akrr_ppn']

            if 'node_list_setter_template' not in batch_vars:
                batch_vars['node_list_setter_template'] = batch_vars['node_list_setter'][batch_vars['batch_scheduler']]

            # process templates
            batch_vars['akrrCommonCommands'] = akrr.util.format_recursively(
                batch_vars['akrr_common_commands_template'], batch_vars, keep_double_brackets=True)

            batch_vars['akrrCommonCleanup'] = akrr.util.format_recursively(
                batch_vars['akrr_common_cleanup_template'], batch_vars, keep_double_brackets=True)

            # specially for IOR request two nodes for single node benchmark, one for read and one for write
            if batch_vars['appkernel_requests_two_nodes_for_one'] is True and batch_vars['akrr_num_of_nodes'] == 1 and \
                    'batch_job_header_template' in batch_vars:
                batch_vars2 = copy.deepcopy(batch_vars)
                batch_vars2['akrr_num_of_cores'] = 2 * batch_vars['akrr_num_of_cores']
                batch_vars2['akrr_num_of_nodes'] = 2 * batch_vars['akrr_num_of_nodes']
                batch_vars2['akrrNCoresToBorder'] = 2 * batch_vars['akrrNCoresToBorder']
                batch_vars2['akrrPPN4NodesOrCores4OneNode'] = batch_vars['akrr_ppn']
                batch_vars['batch_job_header_template'] = akrr.util.format_recursively(
                    batch_vars2['batch_job_header_template'], batch_vars2)

            # do parameters adjustment
            if 'process_params' in batch_vars:
                batch_vars['process_params'](batch_vars)

            # generate job script
            job_script = akrr.util.format_recursively(self.resource["batch_job_template"], batch_vars)
            job_script_full_path = os.path.join(self.taskDir, "jobfiles", self.JobScriptName)
            fout = open(job_script_full_path, "w")
            fout.write(job_script)
            fout.close()
        except Exception as e:
            self.status = "ERROR: Can not created batch job script"
            self.status_info = traceback.format_exc()
            akrr.util.log.log_traceback(self.status)
            raise e
Ejemplo n.º 11
0
    def install_cron_scripts(self):
        """
        Install cron scripts.
        """
        log.info("Installing cron entries")
        if akrr.dry_run:
            return

        if self.cron_email:
            mail = "MAILTO = " + self.cron_email
        else:
            mail = None
        restart = '50 23 * * * bash -l -c "' + _akrr_bin_dir + '/akrr daemon restart -cron"'
        check_and_restart = '33 * * * * bash -l "' + _akrr_bin_dir + '/akrr daemon checknrestart -cron"'
        archive = '43 1 * * * bash -l -c "' + _akrr_bin_dir + '/akrr archive -cron"'

        try:
            crontab_content = subprocess.check_output("crontab -l", shell=True)
            crontab_content = crontab_content.decode("utf-8").splitlines(True)
        except Exception:
            log.info("Crontab does not have user's crontab yet")
            crontab_content = []

        mail_updated = False
        mail_there = False
        restart_there = False
        check_and_restart_there = False
        archive_there = False

        for i in range(len(crontab_content)):
            tmpstr = crontab_content[i]
            if len(tmpstr.strip()) > 1 and tmpstr.strip()[0] != "#":
                m = re.match(r'^MAILTO\s*=\s*(.*)', tmpstr.strip())
                if m:
                    cron_email = m.group(1)
                    cron_email = cron_email.replace('"', '')
                    mail_there = True
                    if self.cron_email != cron_email:
                        if mail:
                            crontab_content[i] = mail
                        else:
                            crontab_content[i] = "#" + crontab_content[i]
                        mail_updated = True
                if tmpstr.count("akrr") and tmpstr.count(
                        "daemon") and tmpstr.count("restart") > 0:
                    restart_there = True
                if tmpstr.count("akrr") and tmpstr.count(
                        "daemon") and tmpstr.count("checknrestart") > 0:
                    check_and_restart_there = True
                if tmpstr.count("akrr") and tmpstr.count(
                        "daemon") and tmpstr.count("archive") > 0:
                    archive_there = True
        if mail_updated:
            log.info("Cron's MAILTO was updated")
        if ((self.cron_email is not None and mail_there) or (
                self.cron_email is None and mail_there is False)) and restart_there and check_and_restart_there \
                and mail_updated is False:
            log.warning(
                "All AKRR crond entries found. No modifications necessary.")
            return
        if self.cron_email is not None and mail_there is False:
            crontab_content.insert(0, mail + "\n")
        if restart_there is False:
            crontab_content.append(restart + "\n")
        if check_and_restart_there is False:
            crontab_content.append(check_and_restart + "\n")
        if archive_there is False:
            crontab_content.append(archive + "\n")

        tmp_cronfile_fd, tmp_cronfile = mkstemp(prefix="crontmp",
                                                dir=os.path.expanduser('~'),
                                                text=True)
        if not akrr.dry_run:
            with open(tmp_cronfile_fd, 'wt') as f:
                for tmp_str in crontab_content:
                    f.write(tmp_str)
            subprocess.call("crontab " + tmp_cronfile, shell=True)
            os.remove(tmp_cronfile)
            log.info("Crontab updated.")
        else:
            log.dry_run("For removing old AKRR should update crontab to:\n" +
                        "".join(crontab_content))
Ejemplo n.º 12
0
def app_validate(resource, appkernel, nnodes):
    from akrr.util.log import verbose
    resource_name = resource
    app_name = appkernel

    error_count = 0
    warning_count = 0

    log.info("Validating " + app_name +
             " application kernel installation on " + resource_name)

    from akrr import get_akrr_dirs

    akrr_dirs = get_akrr_dirs()

    default_resource_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], "default.resource.conf"))
    resource_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['cfg_dir'], "resources", resource_name,
                     "resource.conf"))

    default_app_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], "default.app.conf"))
    app_ker_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], app_name + ".app.conf"))
    ###############################################################################################
    # validating resource parameter file

    log.info("#" * 80)
    log.info("Validating %s parameters from %s" %
             (resource_name, resource_param_filename))

    if not os.path.isfile(resource_param_filename):
        log.error("resource parameters file (%s) do not exists!" %
                  (resource_param_filename, ))
        exit(1)

    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_resource_param_filename).read(),
                default_resource_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(resource_param_filename).read(), resource_param_filename,
                'exec'), tmp)
    except Exception:
        log.exception("Can not load resource from "
                      "" + resource_param_filename + "\n" +
                      "Probably invalid syntax.")
        exit(1)
    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_app_param_filename).read(),
                default_app_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(app_ker_param_filename).read(), app_ker_param_filename,
                'exec'), tmp)
    except Exception:
        log.exception("Can not load application kernel from "
                      "" + app_ker_param_filename + "\n" +
                      "Probably invalid syntax")
        exit(1)

    # now we can load akrr
    from akrr import cfg
    from akrr import akrrrestclient
    from akrr.cli.resource_deploy import make_results_summary
    from akrr.cfg_util import load_app_default, load_app_on_resource

    resource = cfg.find_resource_by_name(resource_name)
    log.info(
        "Syntax of %s is correct and all necessary parameters are present." %
        resource_param_filename)

    cfg.find_app_by_name(app_name)
    try:
        app_default = load_app_default(app_name)
        app = load_app_on_resource(app_name, resource_name, resource,
                                   app_default)

        pprint.pprint(app)
    except Exception as e:  # pylint: disable=broad-except
        log.exception("Exception occurred during updated app loading:" +
                      str(e))
        exit(1)
    log.info(
        "Syntax of %s is correct and all necessary parameters are present." %
        app_ker_param_filename)

    # check if AK is in DB
    if True:
        # add entry to mod_appkernel.resource
        db_ak, cur_ak = akrr.db.get_ak_db(True)

        cur_ak.execute(
            '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
            (app_name, ))
        ak_in_akdb = cur_ak.fetchall()
        if len(ak_in_akdb) == 0:
            cur_ak.execute(
                '''INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible)
                        VALUES(%s,%s,'node',0,%s,0);''',
                (app_name, app_name, app_name))
            db_ak.commit()
        cur_ak.execute(
            '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
            (app_name, ))
        ak_in_akdb = cur_ak.fetchall()[0]
        # add entry to mod_akrr.resource
        db, cur = akrr.db.get_akrr_db(True)

        cur.execute('''SELECT * FROM app_kernels WHERE name=%s''',
                    (app_name, ))
        ak_in_db = cur.fetchall()
        if len(ak_in_db) == 0:
            cur.execute(
                '''INSERT INTO app_kernels (id,name,enabled,nodes_list)
                        VALUES(%s,%s,0,'1,2,4,8');''',
                (ak_in_akdb['ak_def_id'], app_name))
            db.commit()

    ###############################################################################################
    # connect to resource
    log.info("#" * 80)
    log.info("Validating resource accessibility. Connecting to %s." %
             (resource['name']))
    if resource['ssh_private_key_file'] is not None and os.path.isfile(
            resource['ssh_private_key_file']) is False:
        log.error("Can not access ssh private key (%s)"
                  "" % (resource['ssh_private_key_file'], ))
        exit(1)

    str_io = io.StringIO()
    try:
        sys.stdout = sys.stderr = str_io
        # Connect to resource
        # Spin-up instance before ssh it
        if resource['batch_scheduler'].lower() == "openstack":
            # Start instance if it is cloud
            openstack_server = akrr.util.openstack.OpenStackServer(
                resource=resource)
            resource['openstack_server'] = openstack_server
            openstack_server.create()
            resource['remote_access_node'] = openstack_server.ip
        if resource['batch_scheduler'].lower() == "googlecloud":
            # Start instance if it is cloud
            googlecloud_server = akrr.util.googlecloud.GoogleCloudServer(
                resource=resource)
            resource['googlecloud_server'] = googlecloud_server
            googlecloud_server.create()
            resource['remote_access_node'] = googlecloud_server.ip

        rsh = akrr.util.ssh.ssh_resource(resource)

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
    except Exception as e:
        msg2 = str_io.getvalue()
        msg2 += "\n" + traceback.format_exc()
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        msg = "Can not connect to """ + resource['name'] + "\n" + \
              "Probably invalid credential, see full error report below\n" + msg2
        log.error(msg)
        raise e
    print("=" * 80)
    log.info("Successfully connected to %s\n\n" % (resource['name']))

    ###############################################################################################
    log.info("Checking directory locations\n")

    d = resource['akrr_data']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg + "\n")

    d = resource['appkernel_dir']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg + "\n")

    d = resource['network_scratch']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning(msg)
        log.warning(
            ("WARNING %d: network scratch might be have a different location "
             + "on head node, so if it is by design it is ok") %
            (warning_count + 1))
        warning_count += 1
    log.info("")

    d = resource['local_scratch']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning(msg)
        log.warning(
            ("WARNING %d: local scratch might be have a different location " +
             "on head node, so if it is by design it is ok") %
            (warning_count + 1))
        warning_count += 1
    log.info("")

    # close connection we don't need it any more
    rsh.close(force=True)
    del rsh

    # Delete openstack instance after tests
    if resource['batch_scheduler'].lower() == "openstack":
        # delete instance if it is cloud
        resource['openstack_server'].delete()
        resource['remote_access_node'] = None
    if resource['batch_scheduler'].lower() == "googlecloud":
        # delete instance if it is cloud
        resource['googlecloud_server'].delete()
        resource['remote_access_node'] = None

    ###############################################################################################
    # send test job to queue

    log.info("#" * 80)
    log.info(
        "Will send test job to queue, wait till it executed and will analyze the output"
    )

    print("Will use AKRR REST API at", akrrrestclient.restapi_host)
    # get check connection
    try:
        r = akrrrestclient.get('/scheduled_tasks')
        if r.status_code != 200:
            log.error(
                "Can not get token for AKRR REST API ( "
                "" + akrrrestclient.restapi_host + " )\n" +
                "See server response below:\n %s",
                json.dumps(r.json(), indent=4))
            exit(1)
    except Exception:
        log.error("Can not connect to AKRR REST API ( "
                  "" + akrrrestclient.restapi_host + " )\n" +
                  "Is it running?\n" + "See full error report below:\n" +
                  traceback.format_exc())
        exit(1)

    # check if the test job is already submitted
    task_id = None
    test_job_lock_filename = os.path.join(
        cfg.data_dir, resource_name + "_" + app_name + "_test_task.dat")
    if os.path.isfile(test_job_lock_filename):
        fin = open(test_job_lock_filename, "r")
        task_id = int(fin.readline())
        fin.close()

        r = akrrrestclient.get('/tasks/' + str(task_id))
        if r.status_code != 200:
            task_id = None
        else:
            log.warning(
                "\nWARNING %d: Seems this is rerun of this script, will monitor task with task_id = "
                % (warning_count + 1) + str(task_id))
            log.warning("To submit new task delete " + test_job_lock_filename +
                        "\n")
            warning_count += 1
        # check how old is it
    # submit test job
    if task_id is None:
        try:
            payload = {
                'resource': resource_name,
                'app': app_name,
                'resource_param': "{'nnodes':%d}" % nnodes,
                'task_param': "{'test_run':True}"
            }
            r = akrrrestclient.post('/scheduled_tasks', data=payload)
            if r.status_code != 200:
                log.error(
                    "Can not submit task through AKRR REST API ( "
                    "" + akrrrestclient.restapi_host +
                    " )\n" + "See server response below",
                    json.dumps(r.json(), indent=4))
                exit(1)
            task_id = r.json()['data']['data']['task_id']
        except Exception:
            log.error("Can not submit task through AKRR REST API ( "
                      "" + akrrrestclient.restapi_host + " )\n" +
                      "Is it still running?\n" +
                      "See full error report below:\n" +
                      traceback.format_exc())
            exit(1)
        # write file with tast_id
        fout = open(os.path.join(test_job_lock_filename), "w")
        print(task_id, file=fout)
        fout.close()
        log.info("\nSubmitted test job to AKRR, task_id is " + str(task_id) +
                 "\n")
    # now wait till job is done
    msg_body0 = ""
    while True:
        t = datetime.datetime.now()
        # try:
        r = akrrrestclient.get('/tasks/' + str(task_id))

        if r.status_code == 200:
            response_json = r.json()

            msg_body = "=" * 80
            msg_body += "\nTast status:\n"

            if response_json["data"]["queue"] == "scheduled_tasks":
                msg_body += "Task is in scheduled_tasks queue.\n"
                msg_body += "It schedule to be started on " + response_json[
                    "data"]["data"]['time_to_start'] + "\n"
            elif response_json["data"]["queue"] == "active_tasks":
                msg_body += "Task is in active_tasks queue.\n"
                msg_body += "Status: " + str(
                    response_json["data"]["data"]['status']) + "\n"
                msg_body += "Status info:\n" + str(
                    response_json["data"]["data"]['status_info']) + "\n"
            elif response_json["data"]["queue"] == "completed_tasks":
                msg_body += "Task is completed!\n"
                completed_tasks = r.json()['data']['data']['completed_tasks']
                akrr_xdmod_instanceinfo = r.json(
                )['data']['data']['akrr_xdmod_instanceinfo']
                if verbose:
                    msg_body += "completed_tasks table entry:\n" + pp.pformat(
                        completed_tasks) + "\n"
                    msg_body += "akrr_xdmod_instanceinfo table entry:\n" + pp.pformat(
                        akrr_xdmod_instanceinfo) + "\n"
                    msg_body += 'output parsing results:\n' + akrr_xdmod_instanceinfo[
                        'body'] + "\n"
                else:
                    msg_body += "\tstatus: " + str(
                        akrr_xdmod_instanceinfo['status']) + "\n"
                    if akrr_xdmod_instanceinfo['status'] == 0:
                        msg_body += "\tstatus2: " + completed_tasks[
                            'status'] + "\n"
                    msg_body += "\tstatus_info: " + completed_tasks[
                        'status_info'] + "\n"
            else:
                msg_body += r.text + "\n"

            tail_msg = "time: " + t.strftime("%Y-%m-%d %H:%M:%S")

            if msg_body != msg_body0:
                print("\n\n" + msg_body)
                print(tail_msg, end=' ')
                sys.stdout.flush()
            else:
                print("\r" + tail_msg, end=' ')
                sys.stdout.flush()

            msg_body0 = copy.deepcopy(msg_body)

            if response_json["data"]["queue"] == "completed_tasks":
                break
        # try to update:
        try:
            payload = {'next_check_time': ''}
            akrrrestclient.put('/active_tasks/' + str(task_id), data=payload)
        except Exception:
            pass
        time.sleep(5)
    ###############################################################################################
    # analysing the output
    log.info("Test job is completed analyzing output\n")
    r = akrrrestclient.get('/tasks/' + str(task_id))
    if r.status_code != 200:
        log.error(
            "Can not get information about task\n" +
            "See full error report below", "AKRR server response:\n" + r.text)
        exit(1)
    completed_tasks = r.json()['data']['data']['completed_tasks']
    akrr_xdmod_instanceinfo = r.json(
    )['data']['data']['akrr_xdmod_instanceinfo']
    akrr_errmsg = r.json()['data']['data']['akrr_errmsg']

    results_summary = make_results_summary(resource_name, app_name,
                                           completed_tasks,
                                           akrr_xdmod_instanceinfo,
                                           akrr_errmsg)
    # execution was not successful
    if completed_tasks['status'].count("ERROR") > 0:
        if completed_tasks['status'].count(
                "ERROR Can not created batch job script and submit it to remote queue"
        ) > 0:
            log.error(
                "Can not created batch job script and/or submit it to remote queue\n"
                + "See full error report below:\n" + results_summary)
            os.remove(test_job_lock_filename)
            exit(1)
        else:
            log.error(completed_tasks['status'] + "\n" +
                      "See full error report below:\n" + results_summary)
            os.remove(test_job_lock_filename)
            exit(1)

    # execution was not successful
    if akrr_xdmod_instanceinfo['status'] == 0:
        log.error("Task execution was not successful\n" +
                  "See full error report below:\n" + results_summary)
        os.remove(test_job_lock_filename)
        exit(1)
    # see what is in report
    elm_perf = XMLElementTree.fromstring(akrr_xdmod_instanceinfo['body'])
    elm_perf.find('benchmark').find('parameters')
    elm_perf.find('benchmark').find('statistics')

    log.info("\nTest kernel execution summary:")
    print(results_summary)
    print()
    # log.info("\nThe output looks good.\n")
    if error_count == 0:
        # enabling resource for execution
        log.info("\nEnabling %s on %s for execution\n" %
                 (app_name, resource_name))
        try:
            result = akrrrestclient.put('/resources/%s/on' % (resource_name, ),
                                        data={'application': app_name})
            if result.status_code == 200:
                log.info("Successfully enabled %s on %s" %
                         (app_name, resource_name))
            else:
                if result is not None:
                    log.error(
                        "Can not turn-on %s on %s" % (app_name, resource_name),
                        result.text)
                else:
                    log.error("Can not turn-on %s on %s" %
                              (app_name, resource_name))
                exit(1)
            if True:
                # add entry to mod_appkernel.resource
                db_ak, cur_ak = akrr.db.get_ak_db(True)

                cur_ak.execute(
                    '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
                    (app_name, ))
                ak_in_akdb = cur_ak.fetchall()
                if len(ak_in_akdb) == 0:
                    cur_ak.execute(
                        "INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible)"
                        "VALUES(%s,%s,'node',0,%s,0);",
                        (app_name, app_name, app_name))
                    db_ak.commit()
                cur_ak.execute(
                    '''UPDATE app_kernel_def SET enabled=1,visible=1  WHERE ak_base_name=%s''',
                    (app_name, ))
                db_ak.commit()
                # add entry to mod_akrr.resource
                db, cur = akrr.db.get_akrr_db(True)

                cur.execute('''SELECT * FROM app_kernels WHERE name=%s''',
                            (app_name, ))
                ak_in_db = cur.fetchall()
                if len(ak_in_db) == 0:
                    cur.execute(
                        '''INSERT INTO app_kernels (id,name,enabled,nodes_list)
                                VALUES(%s,%s,0,'1,2,4,8');''',
                        (ak_in_akdb['ak_def_id'], app_name))
                    db.commit()
                cur.execute(
                    '''UPDATE app_kernels SET enabled=1  WHERE name=%s''',
                    (app_name, ))
                db.commit()
        except Exception:
            log.exception("Can not turn-on %s on %s", app_name, resource_name)
            exit(1)

    if error_count > 0:
        log.error("There are %d errors, fix them.", error_count)
    if warning_count > 0:
        log.warning(
            "\nThere are %d warnings.\nif warnings have sense (highlighted in yellow), you can move to next step!\n"
            % warning_count)
    if error_count == 0 and warning_count == 0:
        log.info("\nDONE, you can move to next step!\n")
    os.remove(test_job_lock_filename)
Ejemplo n.º 13
0
def _remove_log_dir():
    """remove mod_appkernel"""
    if cfg.akrr_log_dir is None:
        log.warning("akrr_log_dir is None")
        return
    _remove_dir(cfg.akrr_log_dir)
Ejemplo n.º 14
0
 def run_it(_):
     log.warning("add_command_install is not implemented")
Ejemplo n.º 15
0
 def run_it(_):
     log.warning("add_command_build is not implemented")
Ejemplo n.º 16
0
def copy_exec_sources_and_inputs(rsh, resource):
    """Copy exec sources and inputs to remote resource"""
    log.info(
        "Preparing to copy application signature calculator,\n"
        "    app. kernel input files and \n"
        "    HPCC, IMB, IOR and Graph500 source code to remote resource\n")

    try:
        akrr.util.ssh.ssh_command(rsh, "cd %s" % resource['appkernel_dir'])
        out = akrr.util.ssh.ssh_command(rsh, "ls " + resource['appkernel_dir'])
        files_in_appker_dir = out.strip().split()

        if not ("inputs" in files_in_appker_dir
                or "inputs/" in files_in_appker_dir):
            log.info("Copying app. kernel input tarball to %s",
                     resource['appkernel_dir'])
            if not akrr.dry_run:
                akrr.util.ssh.scp_to_resource(
                    resource, cfg.appker_repo_dir + "/inputs.tar.gz",
                    resource['appkernel_dir'])

            log.info("Unpacking app. kernel input files to %s/inputs",
                     resource['appkernel_dir'])
            if not akrr.dry_run:
                out = akrr.util.ssh.ssh_command(
                    rsh,
                    "tar xvfz %s/inputs.tar.gz" % resource['appkernel_dir'])
                log.debug(out)

                out = akrr.util.ssh.ssh_command(
                    rsh, "du -h %s/inputs" % resource['appkernel_dir'])
                log.debug(out)

                if out.count("No such file or directory") == 0:
                    log.info("App. kernel input files are in %s/inputs\n",
                             resource['appkernel_dir'])
                else:
                    raise Exception("files are not copied!")
        else:
            log.warning_count += 1
            log.warning(
                "WARNING %d: App. kernel inputs directory %s/inputs is present, assume they are correct.\n",
                log.warning_count, resource['appkernel_dir'])

        if not ("execs" in files_in_appker_dir
                or "execs/" in files_in_appker_dir):
            log.info(
                "Copying app. kernel execs tarball to %s\n" %
                (resource['appkernel_dir']) +
                "It contains HPCC,IMB,IOR and Graph500 source code and app.signature calculator"
            )
            if not akrr.dry_run:
                akrr.util.ssh.scp_to_resource(
                    resource, cfg.appker_repo_dir + "/execs.tar.gz",
                    resource['appkernel_dir'])
            log.info(
                "Unpacking HPCC,IMB,IOR and Graph500 source code and app.signature calculator files to %s/execs",
                resource['appkernel_dir'])
            if not akrr.dry_run:
                out = akrr.util.ssh.ssh_command(
                    rsh,
                    "tar xvfz %s/execs.tar.gz" % resource['appkernel_dir'])
                log.debug(out)

                out = akrr.util.ssh.ssh_command(
                    rsh, "df -h %s/execs" % resource['appkernel_dir'])
                log.debug(out)

                if out.count("No such file or directory") == 0:
                    log.info(
                        "HPCC,IMB,IOR and Graph500 source code and app.signature calculator are in %s/execs\n",
                        resource['appkernel_dir'])
                else:
                    raise Exception("files are not copied!")
        else:
            log.warning_count += 1
            log.warning(
                "WARNING %d: App. kernel executables directory %s/execs is present, assume they are correct.",
                log.warning_count, resource['appkernel_dir'])
            log.warning(
                "It should contain HPCC,IMB,IOR and Graph500 source code and app.signature calculator\n"
            )

        akrr.util.ssh.ssh_command(rsh, "rm execs.tar.gz  inputs.tar.gz")
    except Exception as e:
        log.critical("Can not copy files to %s", resource['name'])
        raise e
Ejemplo n.º 17
0
def resource_add(config):
    """add resource, config should have following members
        dry_run - Dry Run No files will actually be created
        minimalistic - Minimize questions number, configuration files will be edited manually
        no-ping - do not run ping to test headnode name
        verbose
    """
    global verbose
    global no_ping
    global minimalistic
    global resource_name
    global remote_access_node
    global remote_access_method
    global remote_copy_method
    global ssh_username
    global ssh_password
    global ssh_private_key_file
    global ssh_private_key_password
    global network_scratch
    global local_scratch
    global akrr_data
    global appkernel_dir
    global batch_scheduler
    global batch_job_header_template

    if config.verbose:
        verbose = True

    log.info("Beginning Initiation of New Resource...")
    verbose = config.verbose
    akrr.dry_run = config.dry_run
    no_ping = config.no_ping
    minimalistic = config.minimalistic

    log.info("Retrieving Resources from XDMoD Database...")
    # RETRIEVE: the resources from XDMoD
    resources = retrieve_resources_from_xdmod()
    log.info("Found following resources from XDMoD Database:\n" +
             "    resource_id  name\n" + "\n".join([
                 "    %11d  %-40s" % (resource_id, resource_name)
                 for resource_name, resource_id in resources
             ]) + "\n")

    resource_id = None
    if len(resources) > 0:
        while True:
            log.log_input(
                'Enter resource_id for import (enter None for no match):')
            resource_id = input()
            if validate_resource_id(resource_id, resources):
                break
            log.warning("Incorrect resource_id try again")
        log.empty_line()
        if resource_id != "None":
            resource_id = int(resource_id)
        else:
            resource_id = None

    resource_name = ask.ask('Enter AKRR resource name',
                            validate=validate_resource_name,
                            default=None if resource_id is None else
                            get_resource_name_by_id(resource_id, resources))
    batch_scheduler = ask.multiple_choice_enum(
        'Enter queuing system on resource', QueuingSystemType).value

    if minimalistic is False:
        if batch_scheduler is QueuingSystemType.openstack.value:
            _get_openstack_details()
            get_system_characteristics()
        elif batch_scheduler is QueuingSystemType.googlecloud.value:
            _get_googlecloud_details()
            get_system_characteristics()
        else:
            get_remote_access_method()
            get_system_characteristics()
            get_file_system_access_points()

    log.debug(
        "Summary of parameters" + "resource_name: {}".format(resource_name) +
        "remote_access_node: {}".format(remote_access_node) +
        "remote_access_method: {}".format(remote_access_method) +
        "remote_copy_method: {}".format(remote_copy_method) +
        "ssh_username: {}".format(ssh_username) +
        "ssh_password: {}".format(ssh_password) +
        "ssh_private_key_file: {}".format(ssh_private_key_file) +
        "ssh_private_key_password: {}".format(ssh_private_key_password) +
        "network_scratch: {}".format(network_scratch) +
        "local_scratch: {}".format(local_scratch) +
        "akrr_data: {}".format(akrr_data) +
        "appkernel_dir: {}".format(appkernel_dir) +
        "batch_scheduler: {}".format(batch_scheduler) +
        "batch_job_header_template: {}".format(batch_job_header_template) +
        "\n")

    generate_resource_config(resource_id, resource_name, batch_scheduler)
    log.info("Initiation of new resource is completed.\n"
             "    Edit batch_job_header_template variable in {}\n"
             "    and move to resource validation and deployment step.\n"
             "    i.e. execute:\n"
             "        akrr resource deploy -r {}".format(
                 resource_cfg_filename, resource_name))
Ejemplo n.º 18
0
def check_connection_to_resource():
    """check the connection to remote resource."""
    global remote_access_node
    global remote_access_method
    global remote_copy_method
    global ssh_username
    global ssh_password
    global ssh_password4thisSession
    global ssh_private_key_file
    global ssh_private_key_password

    successfully_connected = False
    passphrase_entrance_count = 0
    authorize_key_count = 0
    while True:
        # Try to connect
        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            akrr.util.ssh.ssh_access(
                remote_access_node,
                ssh=remote_access_method,
                username=ssh_username,
                password=ssh_password,
                private_key_file=ssh_private_key_file,
                private_key_password=ssh_private_key_password,
                logfile=str_io,
                command='ls')

            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__

            successfully_connected = True
            break
        except Exception:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            response = str_io.getvalue()

            log.debug(
                "Had attempted to access resource without password and failed, below is resource response"
                + "=" * 80 + str_io.getvalue() + "=" * 80)

            # check if it asking for passphrase
            m = re.search(r"Enter passphrase for key '(.*)':", response)
            if m:
                if passphrase_entrance_count >= 3:
                    ssh_private_key_password = None
                    ssh_private_key_file = None
                    break
                if passphrase_entrance_count > 0:
                    log.error("Incorrect passphrase try again")
                ssh_private_key_file = m.group(1)
                log.log_input("Enter passphrase for key '%s':" %
                              ssh_private_key_file)
                ssh_private_key_password = getpass.getpass("")
                passphrase_entrance_count += 1
                continue
            m2 = re.search(r"[pP]assword:", response)
            if m is None and ssh_private_key_file is not None and m2:
                log.warning(
                    "Can not login to head node. "
                    "Probably the public key of private key was not authorized on head node"
                )
                log.info(
                    "Will try to add public key to list of authorized keys on head node"
                )
                while True:
                    try:
                        authorize_key_count += 1
                        log.log_input(
                            "Enter password for %s@%s (will be used only during this session):"
                            % (ssh_username, remote_access_node))
                        ssh_password4thisSession = getpass.getpass("")
                        log.empty_line()
                        str_io = io.StringIO()
                        sys.stdout = sys.stderr = str_io
                        akrr.util.ssh.ssh_access(
                            remote_access_node,
                            ssh='ssh-copy-id',
                            username=ssh_username,
                            password=ssh_password4thisSession,
                            private_key_file=ssh_private_key_file,
                            private_key_password=None,
                            logfile=str_io,
                            command='')

                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        log.info(response)

                        log.info(
                            "Have added public key to list of authorized keys on head node, "
                            "will attempt to connect again.")
                        log.empty_line()
                        break
                    except Exception:
                        sys.stdout = sys.__stdout__
                        sys.stderr = sys.__stderr__
                        if verbose:
                            log.debug(
                                "Had attempted to add public key to list of authorized keys on head node and failed, "
                                + "below is resource response" + "=" * 80 +
                                str_io.getvalue() + "=" * 80)
                        log.error("Incorrect password try again.")
                        if authorize_key_count >= 3:
                            break
                if authorize_key_count < 3:
                    continue
            break
    return successfully_connected
Ejemplo n.º 19
0
def resource_deploy(args):
    global checking_frequency

    resource_name = args.resource

    if 'dry_run' in args:
        akrr.dry_run = args.dry_run
    else:
        akrr.dry_run = False

    if "checking_frequency" in args:
        checking_frequency = args.checking_frequency

    if "appkernel" in args:
        app_name = args.appkernel
    else:
        app_name = "test"

    if "nodes" in args:
        nodes = int(args.nodes)
    else:
        nodes = 2

    log.error_count = 0
    log.warning_count = 0

    # validate resource configuration and get config
    resource = validate_resource_parameter_file(resource_name)

    # connect to resource
    if resource['batch_scheduler'].lower() == "openstack":
        # Start instance if it is cloud
        openstack_server = akrr.util.openstack.OpenStackServer(
            resource=resource)
        resource['openstack_server'] = openstack_server
        openstack_server.create()
        resource['remote_access_node'] = openstack_server.ip
    rsh = connect_to_resource(resource)

    # do tests
    check_shell(rsh, resource)
    check_create_dirs(rsh, resource)

    # deploy inputs and sources
    copy_exec_sources_and_inputs(rsh, resource)

    # check that app.signature calculator on headnode
    check_appsig(rsh, resource)

    # close connection we don't need it any more
    rsh.close(force=True)
    del rsh
    if resource['batch_scheduler'].lower() == "openstack":
        # delete instance if it is cloud
        akrr.util.openstack.OpenStackServer(resource=resource)
        resource['openstack_server'].delete()
        resource['remote_access_node'] = None

    # run test job to queue
    run_test_job(resource, app_name, nodes)

    if resource['batch_scheduler'].lower() == "openstack":
        # Start instance if it is cloud
        openstack_server = akrr.util.openstack.OpenStackServer(
            resource=resource)
        resource['openstack_server'] = openstack_server
        openstack_server.create()
        resource['remote_access_node'] = openstack_server.ip

    if log.error_count == 0:
        append_to_bashrc(resource)
        enable_resource_for_execution(resource)

    if resource['batch_scheduler'].lower() == "openstack":
        # delete instance if it is cloud
        akrr.util.openstack.OpenStackServer(resource=resource)
        resource['openstack_server'].delete()
        resource['remote_access_node'] = None

    log.empty_line()

    log.info("Result:")
    if log.error_count > 0:
        log.error("There are %d errors, fix them.", log.error_count)

    if log.warning_count > 0:
        log.warning(
            "There are %d warnings.\nif warnings have sense you can move to next step!\n",
            log.warning_count)
    if log.error_count == 0 and log.warning_count == 0:
        log.info("\nDONE, you can move to next step!\n")
Ejemplo n.º 20
0
def get_remote_access_method():
    global resource_name
    global remote_access_node
    global remote_access_method
    global remote_copy_method
    global ssh_username
    global ssh_password
    global ssh_password4thisSession
    global ssh_private_key_file
    global ssh_private_key_password
    global rsh
    global no_ping

    # set remote_access_node
    while True:
        log.log_input(
            "Enter Resource head node (access node) full name (e.g. headnode.somewhere.org):"
        )
        remote_access_node = input("[%s] " % resource_name)
        if remote_access_node.strip() == "":
            remote_access_node = resource_name

        response = os.system("ping -c 1 -w2 " + remote_access_node +
                             " > /dev/null 2>&1")

        if response == 0:
            break
        else:
            if no_ping:
                log.warning("Can not ping %s, but asked to ignore it.",
                            remote_access_node)
                break
            log.error("Incorrect head node name (can not ping %s), try again",
                      remote_access_node)

    # set ssh_username
    current_user = getpass.getuser()
    ask_for_user_name = True

    while True:
        if ask_for_user_name:
            log.log_input("Enter username for resource access:")
            ssh_username = input("[%s] " % current_user)
            if ssh_username.strip() == "":
                ssh_username = current_user
            current_user = ssh_username

        # check password-less access
        if ssh_password is None:
            log.info("Checking for password-less access")
        else:
            log.info("Checking for resource access")
        successfully_connected = check_connection_to_resource()

        if successfully_connected:
            if ssh_password is None:
                log.info("Can access resource without password")
            else:
                log.info("Can access resource")

        if successfully_connected is False:
            log.info("Can not access resource without password")
            action_list = [(
                "TryAgain",
                "The private and public keys was generated manually, right now. Try again."
            )]
            # check private keys
            user_home_dir = os.path.expanduser("~")
            user_ssh_dir = os.path.join(user_home_dir, '.ssh')
            if os.path.isdir(user_ssh_dir):
                private_keys = [
                    os.path.join(user_ssh_dir, f[:-4])
                    for f in os.listdir(user_ssh_dir)
                    if os.path.isfile(os.path.join(user_ssh_dir, f))
                    and f[-4:] == '.pub'
                    and os.path.isfile(os.path.join(user_ssh_dir, f[:-4]))
                ]
            else:
                private_keys = []

            if len(private_keys) > 0:
                action_list.append(("UseExistingPrivateKey",
                                    "Use existing private and public key."))

            default_action = len(action_list)
            action_list.append(
                ("GenNewKey", "Generate new private and public key."))
            action_list.append(("UsePassword", "Use password directly."))
            log.empty_line()

            log.info("Select authentication method:\n" + "\n".join([
                "%3d  %s" % (i, desc)
                for i, (_, desc) in enumerate(action_list)
            ]))
            while True:
                log.log_input("Select option from list above:")
                try:
                    action = input("[%s] " % default_action)
                    if action.strip() == "":
                        action = default_action
                    else:
                        action = int(action)

                    if action < 0 or action >= len(action_list):
                        raise ValueError()
                    break
                except (ValueError, TypeError):
                    log.error("Incorrect entry, try again.")

            # do the action
            log.empty_line()
            if action_list[action][0] == "TryAgain":
                continue
            if action_list[action][0] == "UsePassword":
                log.log_input("Enter password for %s@%s:" %
                              (ssh_username, remote_access_node))
                ssh_password = getpass.getpass("")
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "UseExistingPrivateKey":
                log.info("Available private keys:" + "\n".join(
                    ["%3d  %s" % (i, p) for i, p in enumerate(private_keys)]))
                while True:
                    log.log_input("Select key number from list above:")
                    try:
                        i_key = input("")
                        i_key = int(i_key)

                        if i_key < 0 or i_key >= len(private_keys):
                            raise ValueError()
                        break
                    except (ValueError, TypeError):
                        log.error("Incorrect entry, try again.")
                ssh_private_key_file = private_keys[i_key]
                ask_for_user_name = not ask_for_user_name
                continue
            if action_list[action][0] == "GenNewKey":
                count = 0
                while True:
                    log.log_input(
                        "Enter password for %s@%s (will be used only during this session):"
                        % (ssh_username, remote_access_node))
                    ssh_password4thisSession = getpass.getpass("")
                    ssh_password = ssh_password4thisSession

                    if check_connection_to_resource():
                        break
                    count += 1
                    if count >= 3:
                        break

                # generate keys
                log.log_input("Enter private key name:")
                ssh_private_key_file = input("[id_rsa_%s]" % resource_name)
                if ssh_private_key_file.strip() == "":
                    ssh_private_key_file = "id_rsa_%s" % resource_name
                ssh_private_key_file = os.path.join(user_home_dir, '.ssh',
                                                    ssh_private_key_file)

                log.log_input(
                    "Enter passphrase for new key (leave empty for passwordless access):"
                )
                ssh_private_key_password = getpass.getpass("")

                if akrr.dry_run:
                    successfully_connected = True
                else:
                    ssh_password = None

                    os.system("ssh-keygen -t rsa -N \"%s\" -f %s" %
                              (ssh_private_key_password, ssh_private_key_file))
                    if ssh_private_key_password.strip() == "":
                        ssh_private_key_password = None
                    # copy keys
                    akrr.util.ssh.ssh_access(
                        remote_access_node,
                        ssh='ssh-copy-id',
                        username=ssh_username,
                        password=ssh_password4thisSession,
                        private_key_file=ssh_private_key_file,
                        private_key_password=None,
                        logfile=sys.stdout,
                        command='')
                    ask_for_user_name = not ask_for_user_name
                    continue

        if successfully_connected:
            break
        else:
            log.error("Incorrect resource access credential")

    if successfully_connected:
        log.empty_line()
        log.info("Connecting to " + resource_name)

        str_io = io.StringIO()
        try:
            sys.stdout = sys.stderr = str_io
            rsh = akrr.util.ssh.ssh_access(
                remote_access_node,
                ssh=remote_access_method,
                username=ssh_username,
                password=ssh_password,
                private_key_file=ssh_private_key_file,
                private_key_password=ssh_private_key_password,
                logfile=sys.stdout,
                command=None)
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
        except Exception as e:
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            log.debug(str_io.getvalue())
            raise e

        log.info("              Done")
    log.empty_line()
    return successfully_connected
Ejemplo n.º 21
0
    def run(self,
            akrr_db: str = None,
            ak_db: str = None,
            xd_db: str = None,
            install_cron_scripts: bool = True,
            stand_alone: bool = False,
            akrr_home: str = None,
            generate_db_only: bool = False,
            update: bool = False,
            old_akrr_home: str = None,
            skip_update_completed_dirs=False,
            skip_update_db=False,
            skip_saving_db_for_update=False):
        """
        Setup or update AKRR

        Parameters
        ----------
        akrr_db: if none will use localhost:3306
        ak_db: if none will use ak_db
        xd_db: if none will use xd_db
        install_cron_scripts: install cron scripts
        stand_alone: run without XDMoD
        update: update current akrr installation
        akrr_home: custom location of akrr home
        generate_db_only: only generate DB
        update: perform update from previous stable version
        old_akrr_home: location of old AKRR home for update
        """
        hints_to_finish_update = ""
        if update:
            self.update = akrr.update.UpdateAKRR(old_akrr_home)

        # Set initial db conf
        if not update:
            if akrr_db is None:
                akrr_db = self.default_akrr_db
            # if ak_db and xd_db is not set use akrr_db
            if ak_db is None:
                ak_db = akrr_db
            if xd_db is None:
                xd_db = akrr_db
        else:
            if akrr_db is None:
                # i.e. not set, use default
                akrr_db = set_user_password_host_port_db(
                    self.update.old_cfg['akrr_db_user'],
                    self.update.old_cfg['akrr_db_passwd'],
                    self.update.old_cfg['akrr_db_host'],
                    self.update.old_cfg['akrr_db_port'],
                    self.update.old_cfg['akrr_db_name'])
            if ak_db is None:
                ak_db = set_user_password_host_port_db(
                    self.update.old_cfg['ak_db_user'],
                    self.update.old_cfg['ak_db_passwd'],
                    self.update.old_cfg['ak_db_host'],
                    self.update.old_cfg['ak_db_port'],
                    self.update.old_cfg['ak_db_name'])
            if xd_db is None:
                xd_db = set_user_password_host_port_db(
                    self.update.old_cfg['xd_db_user'],
                    self.update.old_cfg['xd_db_passwd'],
                    self.update.old_cfg['xd_db_host'],
                    self.update.old_cfg['xd_db_port'],
                    self.update.old_cfg['xd_db_name'])

        # Get db details
        self.akrr_db_user_name, self.akrr_db_user_password, self.akrr_db_host, self.akrr_db_port, self.akrr_db_name = \
            get_user_password_host_port_db(akrr_db, default_database="mod_akrr")

        self.ak_db_user_name, self.ak_db_user_password, self.ak_db_host, self.ak_db_port, self.ak_db_name = \
            get_user_password_host_port_db(ak_db, default_database="mod_appkernel")

        self.xd_db_user_name, self.xd_db_user_password, self.xd_db_host, self.xd_db_port, self.xd_db_name = \
            get_user_password_host_port_db(xd_db, default_database="modw")

        self.stand_alone = stand_alone
        self.generate_db_only = generate_db_only
        self.install_cron_scripts_flag = install_cron_scripts

        self.akrr_home_dir = akrr_home

        # check
        self.check_utils()

        # get directories layout
        global _akrr_dirs, _akrr_home, _akrr_cfg
        self._initial_akrr_dirs = _akrr_dirs
        self._akrr_dirs = akrr.get_akrr_dirs(self.akrr_home_dir)
        _akrr_dirs = self._akrr_dirs
        _akrr_home = _akrr_dirs["akrr_home"]
        _akrr_cfg = _akrr_dirs["akrr_cfg"]

        if self.update:
            # require that old and new akrr home was different
            if _akrr_dirs == self.update.old_akrr_home:
                log.error(
                    "Old and new akrr home directories should be different. Rename old akrr home.\n"
                    + "\tOld AKRR home: %s\n\tNew AKRR home: %s",
                    self.update.old_akrr_home, _akrr_dirs)
                exit(1)
            # shut down old daemon, remove it from cron and update DB
            self.update.remove_old_akrr_from_crontab()
            self.update.shut_down_old_akrr()

        if not self.update:
            # check previous installation
            self.check_previous_installation()
        # set installation directory
        self.init_dir()

        if not self.update:
            # ask info
            self.read_db_user_credentials()

            if self.install_cron_scripts_flag and not self.generate_db_only:
                self.ask_cron_email()

            # if it is dry_run
            # all question are asked, this is dry run, so nothing else to do")
            self.init_mysql_dbs()

        self.generate_self_signed_certificate()
        cfg = self.generate_settings_file()

        if self.update:
            # copy old logs
            if not skip_update_completed_dirs:
                akrr.update.UpdateCompletedDirs(
                    self.update.old_cfg["completed_tasks_dir"],
                    cfg["completed_tasks_dir"]).run()

            # update DB
            if not skip_update_db:
                akrr.update.UpdateDataBase(self.update).update(
                    skip_saving_db_for_update=skip_saving_db_for_update)

            # update config files for resources and appkernels
            hints_to_finish_update = akrr.update.UpdateResourceAppConfigs(
                self.update).update()

        self.set_permission_on_files()
        self.db_check()

        if not self.update:
            self.generate_tables()

        if self.generate_db_only:
            log.info("AKRR DB Generated")
            return

        self.update_bashrc()

        self.start_daemon()
        self.check_daemon()
        if self.install_cron_scripts_flag:
            self.install_cron_scripts()

        log.info("AKRR is set up and is running.")
        if self.update:
            log.warning(
                "Below are instructions to finish conversion " +
                "(shell commands, execute them manually one by one ensure correct run):\n"
                + hints_to_finish_update)
Ejemplo n.º 22
0
    def install_cron_scripts(self):
        """
        Install cron scripts.
        """
        log.info("Installing cron entries")
        if dry_run:
            return

        if self.cron_email:
            mail = "MAILTO = " + self.cron_email
        else:
            mail = None
        restart = "50 23 * * * " + akrr_bin_dir + "/akrr daemon -cron restart"
        check_and_restart = "33 * * * * " + akrr_bin_dir + "/akrr daemon -cron checknrestart"

        try:
            crontan_content = subprocess.check_output("crontab -l", shell=True)
            crontan_content = crontan_content.decode("utf-8").splitlines(True)
        except Exception:
            log.info("Crontab does not have user's crontab yet")
            crontan_content = []

        mail_updated = False
        mail_there = False
        restart_there = False
        check_and_restart_there = False

        for i in range(len(crontan_content)):
            tmpstr = crontan_content[i]
            if len(tmpstr.strip()) > 1 and tmpstr.strip()[0] != "#":
                m = re.match(r'^MAILTO\s*=\s*(.*)', tmpstr.strip())
                if m:
                    cron_email = m.group(1)
                    cron_email = cron_email.replace('"', '')
                    mail_there = True
                    if self.cron_email != cron_email:
                        if mail:
                            crontan_content[i] = mail
                        else:
                            crontan_content[i] = "#" + crontan_content[i]
                        mail_updated = True
                if tmpstr.count("akrr") and tmpstr.count("daemon") and tmpstr.count("restart") > 0:
                    restart_there = True
                if tmpstr.count("akrr") and tmpstr.count("daemon") and tmpstr.count("checknrestart") > 0:
                    check_and_restart_there = True
        if mail_updated:
            log.info("Cron's MAILTO was updated")
        if ((self.cron_email is not None and mail_there) or (
                self.cron_email is None and mail_there is False)) and restart_there and check_and_restart_there \
                and mail_updated is False:
            log.warning("All AKRR crond entries found. No modifications necessary.")
            return
        if self.cron_email is not None and mail_there is False:
            crontan_content.insert(0, mail + "\n")
        if restart_there is False:
            crontan_content.append(restart + "\n")
        if check_and_restart_there is False:
            crontan_content.append(check_and_restart + "\n")

        with open(os.path.expanduser('.crontmp'), 'w') as f:
            for tmpstr in crontan_content:
                f.write(tmpstr)
        subprocess.call("crontab .crontmp", shell=True)
        os.remove(".crontmp")
        log.info("Cron Scripts Processed!")