def wall_time_parsed(args): if not args.list and not (args.resource and args.appkernel and args.nodes and args.walltime): log.error( 'Please provide a resource, app, node count and wall time.') exit(1) listing = args.list resource = args.resource app = args.appkernel nodes = args.nodes walltime = args.walltime comments = args.comments node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [nodes] for nodes in node_list: data = { 'resource_params': "{'nnodes':%d}" % (int(nodes),) if nodes else "{}", 'app_param': '{}', 'walltime': walltime, 'comments': comments } try: from akrr import akrrrestclient result = akrrrestclient.post( '/walltime/%s/%s' % (resource, app), data=data) if not listing else \ akrrrestclient.get( '/walltime/%s/%s' % (resource, app), data=data) if result.status_code == 200: if not listing: log.info('Successfully updated wall time (resource %s: application kernel: %s nodes: %d).' % ( resource, app, nodes)) else: log.info( 'Successfully queried walltime records. \n%s', result.text) else: log.error('something went wrong. %s:%s', result.status_code, result.text) except Exception as e: import traceback log.error(''' An error occured while communicating with the REST API. %s: %s '''.strip(), e.args[0] if len(e.args) > 0 else '', e.args[1] if len(e.args) > 1 else '') print(traceback.print_exc())
def new_task_parsed(args): """ Handles the appropriate execution of a 'New Task' mode request given the provided command line arguments. """ if not (args.resource and args.appkernel and args.nodes): log.error('Please provide a resource, application and node count.') exit(1) resource = args.resource app = args.appkernel time_to_start = args.start_time time_start = args.time_start # if args.time_start else '01:00' time_end = args.time_end # if args.time_end else '05:00' repeat_in = args.periodicity nodes = args.nodes node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else list(nodes) for node in node_list: if time_start != None and time_end != None: time_to_start = calculate_random_start_time( args.start_time, repeat_in, time_start, time_end) data = { 'resource': resource, 'app': app, 'time_to_start': time_to_start, 'repeat_in': repeat_in, 'resource_param': "{'nnodes':%s}" % (node, ) } try: from akrr import akrrrestclient result = akrrrestclient.post('/scheduled_tasks', data=data) if result.status_code == 200: log.info('Successfully submitted new task') else: log.error('something went wrong. %s:%s', result.status_code, result.text) except Exception as e: log.error( ''' An error occured while communicating with the REST API. %s: %s ''', e.args[0] if len(e.args) > 0 else '', e.args[1] if len(e.args) > 1 else '')
def submit_test_job(resource, app_name="test", nodes=2): # submit test job r = None try: payload = { 'resource': resource['name'], 'app': app_name, 'resource_param': "{'nnodes':%d}" % nodes, 'task_param': "{'test_run':True}" } r = akrrrestclient.post('/scheduled_tasks', data=payload) if r.status_code != 200: log.error( "Can not submit task through AKRR REST API ( %s )\nSee server response below\n%s\n", akrrrestclient.restapi_host, json.dumps(r.json(), indent=4)) exit(1) task_id = r.json()['data']['data']['task_id'] except Exception as e: if r is not None: log.critical( "Can not submit task through AKRR REST API ( %s )\n" "Is it still running?\nSee full error report below\n%s", akrrrestclient.restapi_host, r.json()) else: log.critical( "Can not submit task through AKRR REST API ( %s )\n" "Is it still running?\n", akrrrestclient.restapi_host) raise e # write file with task_id test_job_lock_filename = get_test_job_lock_filename(resource, app_name) with open(test_job_lock_filename, "w") as fout: print(task_id, file=fout) log.info("\nSubmitted test job to AKRR, task_id is %d\n", task_id) return task_id
def task_new(resource, appkernel, nodes, time_to_start=None, periodicity=None, time_window_start=None, time_window_end=None, test_run=False, dry_run=False, gen_batch_job_only=False, app_param=None, task_param=None): """ Handles the appropriate execution of a 'New Task' mode request given the provided command line arguments. """ import pprint from akrr.util.time import calculate_random_start_time, get_formatted_time_to_start node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [nodes] if time_to_start is not None: time_to_start = get_formatted_time_to_start(time_to_start) if time_to_start is None: raise AkrrValueException( "Unknown date-time format for time to start!") for node in node_list: if time_window_start is not None and time_window_end is not None: time_to_start = calculate_random_start_time( time_to_start, periodicity, time_window_start, time_window_end) data = { 'resource': resource, 'app': appkernel, 'time_to_start': time_to_start, 'repeat_in': periodicity, 'resource_param': "{'nnodes':%s}" % node } s_task_param = "" if test_run: s_task_param += "'test_run':True" if task_param is not None: s_task_param += task_param if s_task_param == "" else "," + task_param if s_task_param != "": data['task_param'] = "{%s}" % s_task_param if app_param is not None: data['app_param'] = "{%s}" % app_param log.debug("Trying to submit: " + pprint.pformat(data)) if dry_run: log.dry_run( "Should submit following to REST API (POST to scheduled_tasks) %s" % data) if gen_batch_job_only: generate_batch_job_for_testing(resource, appkernel, nodes, dry_run=dry_run) if dry_run or gen_batch_job_only: continue try: from akrr import akrrrestclient import json result = akrrrestclient.post('/scheduled_tasks', data=data) if result.status_code == 200: data_out = json.loads(result.text)["data"]["data"] log.info( 'Successfully submitted new task. The task id is %s.' % data_out["task_id"]) else: log.error('something went wrong. %s:%s', result.status_code, result.text) except Exception as e: log.error( ''' An error occured while communicating with the REST API. %s: %s ''', e.args[0] if len(e.args) > 0 else '', e.args[1] if len(e.args) > 1 else '') raise e
def task_delete_selection(resource: str = None, appkernel: str = None, nodes: str = None, group_id: str = None, active_tasks=False, scheduled_tasks=False): """ delete tasks from schedule """ from akrr import akrrrestclient import json from akrr.db import get_akrr_db from akrr.daemon import delete_task import time if not (resource or appkernel or nodes or group_id): raise AkrrValueException("Something out of resource/appkernel/nodes/group id should be set!") db, cur = get_akrr_db(dict_cursor=True) # ask scheduler not to start new tasks if akrrrestclient.post('/scheduler/no_new_tasks').status_code != 200: raise AkrrRestAPIException("Can not post scheduler/no_new_tasks") if active_tasks: # Now we need to wait till scheduler will be done checking active tasks while True: sql = "SELECT task_id FROM active_tasks WHERE task_lock > 0" log.debug(sql) cur.execute(sql) n_active_checking_task = len(cur.fetchall()) if n_active_checking_task==0: break log.info("There are %d task which daemon is actively working on, waiting for it to pause.", n_active_checking_task) time.sleep(5) # now daemon is not working on any tasks # now we can work with db where = [] if resource: where.append("resource='%s'" % resource) if appkernel: appkernel_list = ["'" + ak.strip() + "'" for ak in appkernel.split(',')] if ',' in appkernel else ["'" + appkernel + "'"] where.append("app IN (" + ",".join(appkernel_list) + ")") if group_id: where.append("group_id='%s'" % group_id) active_tasks_ids = [] if nodes: node_list = [int(node.strip()) for node in nodes.split(',')] if ',' in nodes else [int(nodes)] for node in node_list: where_node1 = where + ["resource_param LIKE \"%'nnodes':"+str(node)+"}%\""] where_node2 = where + ["resource_param LIKE \"%'nnodes':"+str(node)+",%\""] for where_node in [where_node1, where_node2]: if scheduled_tasks: sql = "DELETE FROM scheduled_tasks WHERE " + " AND ".join(where_node) log.debug(sql) cur.execute(sql) if active_tasks: sql = "SELECT task_id FROM active_tasks WHERE " + " AND ".join(where_node) log.debug(sql) cur.execute(sql) active_tasks_ids += [int(t['task_id']) for t in cur.fetchall()] else: if scheduled_tasks: sql = "DELETE FROM scheduled_tasks WHERE " + " AND ".join(where) log.debug(sql) cur.execute(sql) if active_tasks: sql = "SELECT task_id FROM active_tasks WHERE " + " AND ".join(where) log.debug(sql) cur.execute(sql) active_tasks_ids += [int(t['task_id']) for t in cur.fetchall()] if active_tasks: if len(active_tasks_ids)==0: log.info("No active tasks to delete") else: for task_id in active_tasks_ids: log.info("Deleting task_id %d", task_id) delete_task(task_id, remove_from_scheduled_queue=False, remove_from_active_queue=True, remove_derived_task=False) if scheduled_tasks or active_tasks: db.commit() # ask scheduler can start new tasks now if akrrrestclient.post('/scheduler/new_tasks_on').status_code != 200: raise AkrrRestAPIException("Can not post scheduler/new_tasks_on") log.info("Done")
def task_new(resource: str, appkernel: str, nodes: str, time_to_start=None, periodicity=None, time_window_start=None, time_window_end=None, test_run=False, dry_run:bool = False, gen_batch_job_only: bool = False, app_param=None, task_param=None, n_runs: int = 1, group_id: str = ""): """ Handles the appropriate execution of a 'New Task' mode request given the provided command line arguments. """ import pprint from akrr.util.time import calculate_random_start_time, get_formatted_time_to_start if appkernel == "all": import akrr.cfg import akrr.app appkernel_list = [] resource_app_enabled = akrr.app.app_get_enabled() for ak in akrr.cfg.apps.keys(): if resource not in akrr.cfg.apps[ak]['appkernel_on_resource']: continue if resource not in resource_app_enabled: continue if ak not in resource_app_enabled[resource]["apps"]: continue if "resource_app_enabled" not in resource_app_enabled[resource]["apps"][ak]: continue if not resource_app_enabled[resource]["apps"][ak]["resource_app_enabled"]: continue appkernel_list.append(ak) for ak in appkernel_list: task_new( resource, ak, nodes, time_to_start=time_to_start, periodicity=periodicity, time_window_start=time_window_start, time_window_end=time_window_end, test_run=test_run, dry_run=dry_run, gen_batch_job_only=gen_batch_job_only, app_param=app_param, task_param=task_param, n_runs=n_runs, group_id=group_id) return if nodes == "all": import akrr.cfg if appkernel not in akrr.cfg.apps: raise AkrrValueException("Unknown appkernel %s" % appkernel) if resource not in akrr.cfg.apps[appkernel]['appkernel_on_resource']: raise AkrrValueException("Unknown resource %s for appkernel %s" % (resource, appkernel)) if resource not in akrr.cfg.apps[appkernel]['appkernel_on_resource']: raise AkrrValueException("Unknown resource %s for appkernel %s" % (resource, appkernel)) if "num_of_nodes" in akrr.cfg.apps[appkernel]['appkernel_on_resource'][resource]: node_list = akrr.cfg.apps[appkernel]['appkernel_on_resource'][resource]['num_of_nodes'] else: node_list = akrr.cfg.apps[appkernel]['num_of_nodes'] else: node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [nodes] if time_to_start is not None: time_to_start = get_formatted_time_to_start(time_to_start) if time_to_start is None: raise AkrrValueException("Unknown date-time format for time to start!") if n_runs > 1 and periodicity: raise AkrrValueException("n_runs larger than one can not be set with periodicity") for node in node_list: if time_window_start is not None and time_window_end is not None: time_to_start = calculate_random_start_time( time_to_start, periodicity, time_window_start, time_window_end) data = { 'resource': resource, 'app': appkernel, 'time_to_start': time_to_start, 'repeat_in': periodicity, 'resource_param': "{'nnodes':%s}" % node } s_task_param = "" if test_run: s_task_param += "'test_run':True" if n_runs > 1: s_task_param += "" if s_task_param == "" else "," s_task_param += "'n_runs':%d" % n_runs if task_param is not None: s_task_param += "" if s_task_param == "" else "," s_task_param += task_param if s_task_param != "": data['task_param'] = "{%s}" % s_task_param if group_id != "": data['group_id'] = group_id if app_param is not None: data['app_param'] = "{%s}" % app_param log.debug("Trying to submit: "+pprint.pformat(data)) if dry_run: log.dry_run("Should submit following to REST API (POST to scheduled_tasks) %s" % data) if gen_batch_job_only: generate_batch_job_for_testing(resource, appkernel, nodes, dry_run=dry_run) if dry_run or gen_batch_job_only: continue try: from akrr import akrrrestclient import json result = akrrrestclient.post( '/scheduled_tasks', data=data) if result.status_code == 200: data_out = json.loads(result.text)["data"]["data"] log.info('Successfully submitted new task. The task id is %s.' % data_out["task_id"]) else: log.error( 'something went wrong. %s:%s', result.status_code, result.text) except Exception as e: log.error(''' An error occured while communicating with the REST API. %s: %s ''', e.args[0] if len(e.args) > 0 else '', e.args[1] if len(e.args) > 1 else '') raise e
def app_validate(resource, appkernel, nnodes): from akrr.util.log import verbose resource_name = resource app_name = appkernel error_count = 0 warning_count = 0 log.info("Validating " + app_name + " application kernel installation on " + resource_name) from akrr import get_akrr_dirs akrr_dirs = get_akrr_dirs() default_resource_param_filename = os.path.abspath( os.path.join(akrr_dirs['default_dir'], "default.resource.conf")) resource_param_filename = os.path.abspath( os.path.join(akrr_dirs['cfg_dir'], "resources", resource_name, "resource.conf")) default_app_param_filename = os.path.abspath( os.path.join(akrr_dirs['default_dir'], "default.app.conf")) app_ker_param_filename = os.path.abspath( os.path.join(akrr_dirs['default_dir'], app_name + ".app.conf")) ############################################################################################### # validating resource parameter file log.info("#" * 80) log.info("Validating %s parameters from %s" % (resource_name, resource_param_filename)) if not os.path.isfile(resource_param_filename): log.error("resource parameters file (%s) do not exists!" % (resource_param_filename, )) exit(1) # check syntax try: tmp = {} exec( compile( open(default_resource_param_filename).read(), default_resource_param_filename, 'exec'), tmp) exec( compile( open(resource_param_filename).read(), resource_param_filename, 'exec'), tmp) except Exception: log.exception("Can not load resource from " "" + resource_param_filename + "\n" + "Probably invalid syntax.") exit(1) # check syntax try: tmp = {} exec( compile( open(default_app_param_filename).read(), default_app_param_filename, 'exec'), tmp) exec( compile( open(app_ker_param_filename).read(), app_ker_param_filename, 'exec'), tmp) except Exception: log.exception("Can not load application kernel from " "" + app_ker_param_filename + "\n" + "Probably invalid syntax") exit(1) # now we can load akrr from akrr import cfg from akrr import akrrrestclient from akrr.cli.resource_deploy import make_results_summary from akrr.cfg_util import load_app_default, load_app_on_resource resource = cfg.find_resource_by_name(resource_name) log.info( "Syntax of %s is correct and all necessary parameters are present." % resource_param_filename) cfg.find_app_by_name(app_name) try: app_default = load_app_default(app_name) app = load_app_on_resource(app_name, resource_name, resource, app_default) pprint.pprint(app) except Exception as e: # pylint: disable=broad-except log.exception("Exception occurred during updated app loading:" + str(e)) exit(1) log.info( "Syntax of %s is correct and all necessary parameters are present." % app_ker_param_filename) # check if AK is in DB if True: # add entry to mod_appkernel.resource db_ak, cur_ak = akrr.db.get_ak_db(True) cur_ak.execute( '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''', (app_name, )) ak_in_akdb = cur_ak.fetchall() if len(ak_in_akdb) == 0: cur_ak.execute( '''INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible) VALUES(%s,%s,'node',0,%s,0);''', (app_name, app_name, app_name)) db_ak.commit() cur_ak.execute( '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''', (app_name, )) ak_in_akdb = cur_ak.fetchall()[0] # add entry to mod_akrr.resource db, cur = akrr.db.get_akrr_db(True) cur.execute('''SELECT * FROM app_kernels WHERE name=%s''', (app_name, )) ak_in_db = cur.fetchall() if len(ak_in_db) == 0: cur.execute( '''INSERT INTO app_kernels (id,name,enabled,nodes_list) VALUES(%s,%s,0,'1,2,4,8');''', (ak_in_akdb['ak_def_id'], app_name)) db.commit() ############################################################################################### # connect to resource log.info("#" * 80) log.info("Validating resource accessibility. Connecting to %s." % (resource['name'])) if resource['ssh_private_key_file'] is not None and os.path.isfile( resource['ssh_private_key_file']) is False: log.error("Can not access ssh private key (%s)" "" % (resource['ssh_private_key_file'], )) exit(1) str_io = io.StringIO() try: sys.stdout = sys.stderr = str_io # Connect to resource # Spin-up instance before ssh it if resource['batch_scheduler'].lower() == "openstack": # Start instance if it is cloud openstack_server = akrr.util.openstack.OpenStackServer( resource=resource) resource['openstack_server'] = openstack_server openstack_server.create() resource['remote_access_node'] = openstack_server.ip if resource['batch_scheduler'].lower() == "googlecloud": # Start instance if it is cloud googlecloud_server = akrr.util.googlecloud.GoogleCloudServer( resource=resource) resource['googlecloud_server'] = googlecloud_server googlecloud_server.create() resource['remote_access_node'] = googlecloud_server.ip rsh = akrr.util.ssh.ssh_resource(resource) sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ except Exception as e: msg2 = str_io.getvalue() msg2 += "\n" + traceback.format_exc() sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ msg = "Can not connect to """ + resource['name'] + "\n" + \ "Probably invalid credential, see full error report below\n" + msg2 log.error(msg) raise e print("=" * 80) log.info("Successfully connected to %s\n\n" % (resource['name'])) ############################################################################################### log.info("Checking directory locations\n") d = resource['akrr_data'] log.info("Checking: %s:%s" % (resource['remote_access_node'], d)) status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True) log.info(msg + "\n") d = resource['appkernel_dir'] log.info("Checking: %s:%s" % (resource['remote_access_node'], d)) status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True) log.info(msg + "\n") d = resource['network_scratch'] log.info("Checking: %s:%s" % (resource['remote_access_node'], d)) status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False) if status is True: log.info(msg) else: log.warning(msg) log.warning( ("WARNING %d: network scratch might be have a different location " + "on head node, so if it is by design it is ok") % (warning_count + 1)) warning_count += 1 log.info("") d = resource['local_scratch'] log.info("Checking: %s:%s" % (resource['remote_access_node'], d)) status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False) if status is True: log.info(msg) else: log.warning(msg) log.warning( ("WARNING %d: local scratch might be have a different location " + "on head node, so if it is by design it is ok") % (warning_count + 1)) warning_count += 1 log.info("") # close connection we don't need it any more rsh.close(force=True) del rsh # Delete openstack instance after tests if resource['batch_scheduler'].lower() == "openstack": # delete instance if it is cloud resource['openstack_server'].delete() resource['remote_access_node'] = None if resource['batch_scheduler'].lower() == "googlecloud": # delete instance if it is cloud resource['googlecloud_server'].delete() resource['remote_access_node'] = None ############################################################################################### # send test job to queue log.info("#" * 80) log.info( "Will send test job to queue, wait till it executed and will analyze the output" ) print("Will use AKRR REST API at", akrrrestclient.restapi_host) # get check connection try: r = akrrrestclient.get('/scheduled_tasks') if r.status_code != 200: log.error( "Can not get token for AKRR REST API ( " "" + akrrrestclient.restapi_host + " )\n" + "See server response below:\n %s", json.dumps(r.json(), indent=4)) exit(1) except Exception: log.error("Can not connect to AKRR REST API ( " "" + akrrrestclient.restapi_host + " )\n" + "Is it running?\n" + "See full error report below:\n" + traceback.format_exc()) exit(1) # check if the test job is already submitted task_id = None test_job_lock_filename = os.path.join( cfg.data_dir, resource_name + "_" + app_name + "_test_task.dat") if os.path.isfile(test_job_lock_filename): fin = open(test_job_lock_filename, "r") task_id = int(fin.readline()) fin.close() r = akrrrestclient.get('/tasks/' + str(task_id)) if r.status_code != 200: task_id = None else: log.warning( "\nWARNING %d: Seems this is rerun of this script, will monitor task with task_id = " % (warning_count + 1) + str(task_id)) log.warning("To submit new task delete " + test_job_lock_filename + "\n") warning_count += 1 # check how old is it # submit test job if task_id is None: try: payload = { 'resource': resource_name, 'app': app_name, 'resource_param': "{'nnodes':%d}" % nnodes, 'task_param': "{'test_run':True}" } r = akrrrestclient.post('/scheduled_tasks', data=payload) if r.status_code != 200: log.error( "Can not submit task through AKRR REST API ( " "" + akrrrestclient.restapi_host + " )\n" + "See server response below", json.dumps(r.json(), indent=4)) exit(1) task_id = r.json()['data']['data']['task_id'] except Exception: log.error("Can not submit task through AKRR REST API ( " "" + akrrrestclient.restapi_host + " )\n" + "Is it still running?\n" + "See full error report below:\n" + traceback.format_exc()) exit(1) # write file with tast_id fout = open(os.path.join(test_job_lock_filename), "w") print(task_id, file=fout) fout.close() log.info("\nSubmitted test job to AKRR, task_id is " + str(task_id) + "\n") # now wait till job is done msg_body0 = "" while True: t = datetime.datetime.now() # try: r = akrrrestclient.get('/tasks/' + str(task_id)) if r.status_code == 200: response_json = r.json() msg_body = "=" * 80 msg_body += "\nTast status:\n" if response_json["data"]["queue"] == "scheduled_tasks": msg_body += "Task is in scheduled_tasks queue.\n" msg_body += "It schedule to be started on " + response_json[ "data"]["data"]['time_to_start'] + "\n" elif response_json["data"]["queue"] == "active_tasks": msg_body += "Task is in active_tasks queue.\n" msg_body += "Status: " + str( response_json["data"]["data"]['status']) + "\n" msg_body += "Status info:\n" + str( response_json["data"]["data"]['status_info']) + "\n" elif response_json["data"]["queue"] == "completed_tasks": msg_body += "Task is completed!\n" completed_tasks = r.json()['data']['data']['completed_tasks'] akrr_xdmod_instanceinfo = r.json( )['data']['data']['akrr_xdmod_instanceinfo'] if verbose: msg_body += "completed_tasks table entry:\n" + pp.pformat( completed_tasks) + "\n" msg_body += "akrr_xdmod_instanceinfo table entry:\n" + pp.pformat( akrr_xdmod_instanceinfo) + "\n" msg_body += 'output parsing results:\n' + akrr_xdmod_instanceinfo[ 'body'] + "\n" else: msg_body += "\tstatus: " + str( akrr_xdmod_instanceinfo['status']) + "\n" if akrr_xdmod_instanceinfo['status'] == 0: msg_body += "\tstatus2: " + completed_tasks[ 'status'] + "\n" msg_body += "\tstatus_info: " + completed_tasks[ 'status_info'] + "\n" else: msg_body += r.text + "\n" tail_msg = "time: " + t.strftime("%Y-%m-%d %H:%M:%S") if msg_body != msg_body0: print("\n\n" + msg_body) print(tail_msg, end=' ') sys.stdout.flush() else: print("\r" + tail_msg, end=' ') sys.stdout.flush() msg_body0 = copy.deepcopy(msg_body) if response_json["data"]["queue"] == "completed_tasks": break # try to update: try: payload = {'next_check_time': ''} akrrrestclient.put('/active_tasks/' + str(task_id), data=payload) except Exception: pass time.sleep(5) ############################################################################################### # analysing the output log.info("Test job is completed analyzing output\n") r = akrrrestclient.get('/tasks/' + str(task_id)) if r.status_code != 200: log.error( "Can not get information about task\n" + "See full error report below", "AKRR server response:\n" + r.text) exit(1) completed_tasks = r.json()['data']['data']['completed_tasks'] akrr_xdmod_instanceinfo = r.json( )['data']['data']['akrr_xdmod_instanceinfo'] akrr_errmsg = r.json()['data']['data']['akrr_errmsg'] results_summary = make_results_summary(resource_name, app_name, completed_tasks, akrr_xdmod_instanceinfo, akrr_errmsg) # execution was not successful if completed_tasks['status'].count("ERROR") > 0: if completed_tasks['status'].count( "ERROR Can not created batch job script and submit it to remote queue" ) > 0: log.error( "Can not created batch job script and/or submit it to remote queue\n" + "See full error report below:\n" + results_summary) os.remove(test_job_lock_filename) exit(1) else: log.error(completed_tasks['status'] + "\n" + "See full error report below:\n" + results_summary) os.remove(test_job_lock_filename) exit(1) # execution was not successful if akrr_xdmod_instanceinfo['status'] == 0: log.error("Task execution was not successful\n" + "See full error report below:\n" + results_summary) os.remove(test_job_lock_filename) exit(1) # see what is in report elm_perf = XMLElementTree.fromstring(akrr_xdmod_instanceinfo['body']) elm_perf.find('benchmark').find('parameters') elm_perf.find('benchmark').find('statistics') log.info("\nTest kernel execution summary:") print(results_summary) print() # log.info("\nThe output looks good.\n") if error_count == 0: # enabling resource for execution log.info("\nEnabling %s on %s for execution\n" % (app_name, resource_name)) try: result = akrrrestclient.put('/resources/%s/on' % (resource_name, ), data={'application': app_name}) if result.status_code == 200: log.info("Successfully enabled %s on %s" % (app_name, resource_name)) else: if result is not None: log.error( "Can not turn-on %s on %s" % (app_name, resource_name), result.text) else: log.error("Can not turn-on %s on %s" % (app_name, resource_name)) exit(1) if True: # add entry to mod_appkernel.resource db_ak, cur_ak = akrr.db.get_ak_db(True) cur_ak.execute( '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''', (app_name, )) ak_in_akdb = cur_ak.fetchall() if len(ak_in_akdb) == 0: cur_ak.execute( "INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible)" "VALUES(%s,%s,'node',0,%s,0);", (app_name, app_name, app_name)) db_ak.commit() cur_ak.execute( '''UPDATE app_kernel_def SET enabled=1,visible=1 WHERE ak_base_name=%s''', (app_name, )) db_ak.commit() # add entry to mod_akrr.resource db, cur = akrr.db.get_akrr_db(True) cur.execute('''SELECT * FROM app_kernels WHERE name=%s''', (app_name, )) ak_in_db = cur.fetchall() if len(ak_in_db) == 0: cur.execute( '''INSERT INTO app_kernels (id,name,enabled,nodes_list) VALUES(%s,%s,0,'1,2,4,8');''', (ak_in_akdb['ak_def_id'], app_name)) db.commit() cur.execute( '''UPDATE app_kernels SET enabled=1 WHERE name=%s''', (app_name, )) db.commit() except Exception: log.exception("Can not turn-on %s on %s", app_name, resource_name) exit(1) if error_count > 0: log.error("There are %d errors, fix them.", error_count) if warning_count > 0: log.warning( "\nThere are %d warnings.\nif warnings have sense (highlighted in yellow), you can move to next step!\n" % warning_count) if error_count == 0 and warning_count == 0: log.info("\nDONE, you can move to next step!\n") os.remove(test_job_lock_filename)