def get(el_type, error=None): result = {"errors": []} db = open_session() agents = db.query(Agent).filter(Agent.type == "raspberry").filter( Agent.state == "connected").all() for w in agents: result[w.name] = {"properties": [], "existing": {}} try: # Get the element properties to register new elements r = requests.post(url="http://%s:%s/v1/admin/add/%s" % (w.ip, w.port, el_type), timeout=6, json={"token": w.token}) if r.status_code != 200 or "missing" not in r.json(): result["errors"].append("wrong answer from the agent '%s'" % w.name) else: result[w.name]["properties"] = r.json()["missing"] for prop in result[w.name]["properties"]: if "no_values" in result[w.name]["properties"][prop]: result[w.name][ "properties"] = "Missing '%s' elements to create '%s' elements." % ( prop, el_type) # Get the existing elements r = requests.post(url="http://%s:%s/v1/user/%s/list" % (w.ip, w.port, el_type), timeout=6, json={"token": w.token}) if r.status_code != 200: result["errors"].append( "can not get the list of %ss from the agent '%s'" % (el_type, w.name)) else: node_info = r.json() for el_name in node_info: one_node = node_info[el_name] one_node["name"] = el_name result[w.name]["existing"][el_name] = one_node except: result["errors"].append("can not connect to the agent '%s'" % w.name) logging.exception("can not connect to the agent '%s'" % w.name) close_session(db) for agent in result: if agent != "errors": result[agent]["existing"] = sort_by_name(result[agent]["existing"]) if isinstance(error, dict) or (error is not None and len(error) > 0): return flask.render_template("admin.html", admin=current_user.is_admin, active_btn="admin_%s" % el_type, elem_type=el_type, elements=result, msg=error) if len(result["errors"]) == 0: return flask.render_template("admin.html", admin=current_user.is_admin, active_btn="admin_%s" % el_type, elem_type=el_type, elements=result) else: return flask.render_template("admin.html", admin=current_user.is_admin, active_btn="admin_%s" % el_type, elem_type=el_type, elements=result, msg=",".join(result["errors"]))
for node in old_nodes: # Try to rescue lost nodes if node.state == "lost" and node.temp_info is not None: state_name = node.temp_info.replace("_post", "") process_name = get_process_from_state(state_name, node.environment) if len(process_name) == 0: logger.error("[%s] No process with the state '%s'" % (node.node_name, state_name)) else: node.process = process_name node.state = state_name node.temp_info = None # Update the time of nodes that are deploying if node.state != "lost": node.updated_at = datetime.now() close_session(db_session) logger.info("# Analyzing the node states") while not os.path.isfile(STOP_FILE): try: # Retrieve the running deployments db_session = open_session() pending_nodes = db_session.query(Deployment).filter( Deployment.state != "destroyed").filter( Deployment.state != "initialized").filter( Deployment.state != "deployed").filter( Deployment.state != "lost").filter( Deployment.state != "booted").all() if len(pending_nodes) > 0: logger.info("## Nb. of pending nodes: %d" % len(pending_nodes)) # Sort the nodes according the list of states sorted_nodes = {key: [] for key in state_desc.keys()}
def power_get_helper(agent_name, switch_name, period): result = {} # Check the period unit: s (seconds), m (minutes), h (hours), d (days) last_char = period[-1] if last_char not in ["s", "m", "h", "d"]: return {"error": "wrong unit for the period parameter"} # Get the agent information db = open_session() agent = db.query(Agent).filter(Agent.state == "connected").filter( Agent.name == agent_name).first() if agent is None: close_session(db) return json.dumps({"error": "agent '%s' does not exist" % agent_name}) r = requests.post(url="http://%s:%s/v1/user/node/list" % (agent.ip, agent.port), timeout=POST_TIMEOUT, json={"token": agent.token}) if r.status_code == 200: # Retrieve the name of the nodes node_list = r.json() for n in node_list: my_switch = node_list[n]["switch"] my_port = str(node_list[n]["port_number"]) if my_switch == switch_name: if my_switch not in result: result[my_switch] = {} result[my_switch][my_port] = {"node": n, "consumptions": []} # Retrieve the port consumptions of the switch r = requests.post(url="http://%s:%s/v1/user/switch/consumption" % (agent.ip, agent.port), timeout=POST_TIMEOUT, json={ "token": agent.token, "switch": switch_name, "period": period }) if r.status_code == 200: r_json = r.json() for cons in r_json: my_switch = cons["switch"] my_port = cons["port"] if my_switch not in result: result[my_switch] = {} if my_port not in result[my_switch]: result[my_switch][my_port] = {"consumptions": []} result[my_switch][my_port]["consumptions"].append({ "time": cons["time"], "consumption": float(cons["consumption"]) }) # Remove the switch ports without consumptions or with every consumption equal to 0 for switch in list(result.keys()): switch_w_cons = False for port in list(result[switch].keys()): port_w_cons = False for cons in result[switch][port]["consumptions"]: if cons["consumption"] > 0: port_w_cons = True switch_w_cons = True if not port_w_cons: del result[switch][port] if not switch_w_cons: del result[switch] close_session(db) return result
from database.base import DB_URL from database.tables import User from database.connector import create_tables, open_session, close_session import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info("Create the tables from the URL '%s'" % DB_URL) if create_tables(): logging.info("Database initialization complete") # Create the admin user db = open_session() admin = User() admin.email = "*****@*****.**" admin.password = \ "sha256$kFAl3lBK$d48c4bd2ff12742351b219af6c4aff6d4cb9893b9b6146f6ea8d8a06c7a9a436" admin.is_authorized = True admin.is_admin = True db.add(admin) close_session(db) else: logging.error("Fail to initialize the database")
def make_reserve(): result = {"total_nodes": 0, "wanted": 0, "errors": []} nb_wanted = 0 msg = "" db = open_session() for f in flask.request.json["filters"]: result["wanted"] += f["nb_nodes"] if "agent" in f: agent = db.query(Agent).filter(Agent.name == f["agent"]).filter( Agent.state == "connected").first() if agent is not None: # Make the reservation r_data = post_data(db, current_user.email, agent.type, agent.token) r_data["filter"] = f r_data["start_date"] = flask.request.json["start_date"] r_data["duration"] = flask.request.json["duration"] r = requests.post(url="http://%s:%s/v1/user/reserve" % (agent.ip, agent.port), timeout=POST_TIMEOUT, json=r_data) if r.status_code == 200: r_json = r.json() if "nodes" in r_json: result["total_nodes"] += len(r.json()["nodes"]) if "error" in r_json: result["errors"].append(r_json["error"]) else: logging.error( "can not reserve nodes: wrong return code %d from '%s'" % (r.status_code, agent.name)) else: logging.error( "can not find the agent '%s' (maybe it is disconnected)" % f["agent"]) elif "type" in f: agent_type = f["type"] del f["type"] for agent in db.query(Agent).filter( Agent.type == agent_type).filter( Agent.state == "connected").all(): if f["nb_nodes"] > 0: # Make the reservation r_data = post_data(db, current_user.email, agent.type, agent.token) r_data["filter"] = f r_data["start_date"] = flask.request.json["start_date"] r_data["duration"] = flask.request.json["duration"] r = requests.post(url="http://%s:%s/v1/user/reserve" % (agent.ip, agent.port), timeout=POST_TIMEOUT, json=r_data) if r.status_code == 200: nb_nodes = len(r.json()["nodes"]) result["total_nodes"] += nb_nodes f["nb_nodes"] -= nb_nodes else: logging.error( "can not reserve nodes: wrong return code %d from '%s'" % (r.status_code, agent.name)) else: logging.error( "node reservation failure: no 'type' property and no 'agent' property in the POST data" ) close_session(db) return json.dumps(result)
def power_data_helper(switch_name=None, period_str=None): result = node_list_helper()["nodes"] node_data = {} # Retrieve the name of the nodes for r in result: my_agent = result[r]["agent"] my_switch = result[r]["switch"] my_port = str(result[r]["port_number"]) if my_agent not in node_data: node_data[my_agent] = {} if my_switch not in node_data[my_agent]: node_data[my_agent][my_switch] = {} node_data[my_agent][my_switch][my_port] = { "node": r, "consumptions": [] } # Add the power consumption of every port db = open_session() for agent_name in node_data: agent = db.query(Agent).filter(Agent.name == agent_name).first() r_data = post_data(db, current_user.email, agent.type, agent.token) if isinstance(period_str, str) and len(period_str) > 0: r_data["period"] = period_str if isinstance(switch_name, str) and len(switch_name) > 0: r_data["switch"] = switch_name r = requests.post(url="http://%s:%s/v1/user/switch/consumption" % (agent.ip, agent.port), timeout=POST_TIMEOUT, json=r_data) if r.status_code == 200: r_json = r.json() for cons in r_json: my_switch = cons["switch"] my_port = cons["port"] if agent_name not in node_data: node_data[agent_name] = {} if my_switch not in node_data[agent_name]: node_data[agent_name][my_switch] = {} if my_port not in node_data[agent_name][my_switch]: node_data[agent_name][my_switch][my_port] = { "consumptions": [] } node_data[agent_name][my_switch][my_port][ "consumptions"].append({ "time": cons["time"], "consumption": float(cons["consumption"]) }) else: logging.error( "Can not retrieve power monitoring values from agent '%s' (status code: %d)" % (agent_name, r.status_code)) close_session(db) # Remove the switch ports without consumptions or with every consumption equal to 0 for agent in list(node_data.keys()): agent_w_cons = False for switch in list(node_data[agent].keys()): switch_w_cons = False for port in list(node_data[agent][switch].keys()): port_w_cons = False for cons in node_data[agent][switch][port]["consumptions"]: if cons["consumption"] > 0: port_w_cons = True switch_w_cons = True agent_w_cons = True if not port_w_cons: del node_data[agent][switch][port] if not switch_w_cons: del node_data[agent][switch] if not agent_w_cons: del node_data[agent] return node_data
def add_switch(): switch_data = flask.request.json del switch_data["token"] # Required properties to create switches switch_props = [str(c).split(".")[1] for c in RaspSwitch.__table__.columns] # Remove computed properties switch_props.remove("port_number") switch_props.remove("oid_offset") switch_props.remove("first_ip") # Check if all properties belong to the POST data missing_data = dict([(key_data, []) for key_data in switch_props if key_data not in switch_data.keys()]) if len(missing_data) == 0: checks = {} for data in switch_data: checks[data] = {"value": switch_data[data]} db = open_session() existing = db.query(RaspSwitch).filter( RaspSwitch.name == switch_data["name"]).all() for to_del in existing: db.delete(to_del) # Check the IP ip_check = False cmd = 'ping -c 1 -W 1 %s' % switch_data['ip'] process = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ip_check = process.returncode == 0 checks["ip"]["check"] = ip_check if ip_check: # Remove the last digit of the OID root_oid = switch_data["poe_oid"] root_oid = root_oid[:root_oid.rindex(".")] switch_info = switch_test(switch_data["ip"], switch_data["community"], root_oid) # Check the SNMP connection snmp_check = switch_info["success"] checks["community"]["check"] = snmp_check checks["poe_oid"]["check"] = snmp_check if ip_check and snmp_check: db = open_session() # Get information about existing switches to reserve the IP range for the nodes connected to the new switch all_switches = db.query(RaspSwitch).order_by( RaspSwitch.first_ip).all() existing_info = {} for sw in all_switches: existing_info[sw.name] = { "port_number": sw.port_number, "first_ip": sw.first_ip } # Choose the last digit of the first IP such as [last_digit, last_digit + port_number] is available last_digit = 1 for sw in existing_info.values(): new_last = last_digit + switch_info["port_number"] - 1 if new_last < sw["first_ip"]: # We found the last_digit value break else: last_digit = sw["first_ip"] + sw["port_number"] if last_digit + switch_info["port_number"] - 1 > 250: close_session(db) msg = "No IP range available for the switch '%s' with %d ports" % ( switch_data["name"], switch_info["port_number"]) logging.error(msg) return json.dumps({"error": msg}) # Add the switch new_switch = RaspSwitch() new_switch.name = switch_data["name"] new_switch.ip = switch_data["ip"] new_switch.community = switch_data["community"] new_switch.port_number = switch_info["port_number"] new_switch.first_ip = last_digit new_switch.master_port = switch_data["master_port"] new_switch.poe_oid = switch_info["poe_oid"] new_switch.oid_offset = switch_info["offset"] # Remove the last digit of the OID power_oid = switch_data["power_oid"] new_switch.power_oid = power_oid[:power_oid.rindex(".")] db.add(new_switch) close_session(db) return json.dumps({"switch": switch_data["name"]}) else: return json.dumps({"check": checks}) else: return json.dumps({"missing": missing_data})
def node_mine(arg_dict): if "user" not in arg_dict or "@" not in arg_dict["user"] or \ "iot_user" not in arg_dict or "iot_password" not in arg_dict: return json.dumps({ "parameters": { "user": "******", "iot_user": "******", "iot_password": "******" } }) result = {"states": [], "nodes": {}} # Get the list of the states for the 'deploy' process py_module = import_module("%s.states" % get_config()["node_type"]) PROCESS = getattr(py_module, "PROCESS") for p in PROCESS["deploy"]: if len(p["states"]) > len(result["states"]): result["states"] = p["states"] # Get the existing job for this user db = open_session() schedule = db.query(Schedule).filter( Schedule.owner == arg_dict["user"]).all() db_jobs = {sch.node_name: sch for sch in schedule} cmd = "iotlab-experiment -u %s -p %s get -l" % ( arg_dict["iot_user"], decrypt_password(arg_dict["iot_password"])) process = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, universal_newlines=True) json_data = json.loads(process.stdout)["items"] now = time.time() for resa in json_data: id_str = str(resa["id"]) # Manage the jobs registrered in the DB if id_str in db_jobs: # Delete from the DB the jobs that ends more than 24 hours ago if db_jobs[id_str].end_date < now - 24 * 3600: db.delete(db_jobs[id_str]) nodes = db.query(IotNodes).filter(IotNodes.job_id).first() if nodes is not None: db.delete(nodes) # Do not analyze this job my_sch = None else: my_sch = db_jobs[id_str] else: start_date = datetime.strptime(resa["start_date"], "%Y-%m-%dT%H:%M:%SZ") start_date = start_date.replace(tzinfo=pytz.UTC) start_time = start_date.timestamp() end_time = start_date.timestamp() + resa["submitted_duration"] * 60 # Check if the job is terminated within the last 24 hours if end_time > now - 24 * 3600: # Register the job to the DB schedule = Schedule() schedule.node_name = resa["id"] schedule.owner = arg_dict["user"] schedule.bin = "autodetected-jobs" schedule.state = "ready" schedule.action_state = "" # Compute the dates start_date = datetime.strptime(resa["start_date"], "%Y-%m-%dT%H:%M:%SZ") start_date = start_date.replace(tzinfo=pytz.UTC) schedule.start_date = start_date.timestamp() schedule.end_date = start_date.timestamp( ) + resa["submitted_duration"] * 60 db.add(schedule) my_sch = schedule else: # Do not analyze this job my_sch = None if my_sch is not None: # Get the list of the assigned nodes nodes = db.query(IotNodes).filter( IotNodes.job_id == resa["id"]).first() if nodes is None: cmd = "iotlab-experiment -u %s -p %s get -i %s -n" % ( arg_dict["iot_user"], decrypt_password(arg_dict["iot_password"]), id_str) process = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, universal_newlines=True) node_data = json.loads(process.stdout)["items"] assigned_nodes = [] for n in node_data: name = n["network_address"] assigned_nodes.append( name.split(".")[0] + "@" + name.split(".")[1]) nodes_str = ",".join(assigned_nodes) nodes_db = IotNodes() nodes_db.job_id = resa["id"] nodes_db.assigned_nodes = nodes_str db.add(nodes_db) else: nodes_str = nodes.assigned_nodes # Send job information result["nodes"][my_sch.node_name] = { "node_name": my_sch.node_name, "bin": my_sch.bin, "start_date": my_sch.start_date, "end_date": my_sch.end_date, "state": resa["state"].lower(), "assigned_nodes": nodes_str } result["nodes"][my_sch.node_name]["data_link"] = ( resa["state"] == "Terminated" or resa["state"] == "Stopped") close_session(db) return json.dumps(result)
def node_mine(arg_dict): if "user" not in arg_dict or "@" not in arg_dict["user"] or \ "g5k_user" not in arg_dict or "g5k_password" not in arg_dict: return json.dumps({ "parameters": { "user": "******", "g5k_user": "******", "g5k_password": "******" } }) result = {"states": [], "nodes": {}} # Get the list of the states for the 'deploy' process py_module = import_module("%s.states" % get_config()["node_type"]) PROCESS = getattr(py_module, "PROCESS") for p in PROCESS["deploy"]: if len(p["states"]) > len(result["states"]): result["states"] = p["states"] # Get the existing job for this user db = open_session() schedule = db.query(Schedule).filter( Schedule.owner == arg_dict["user"]).filter( Schedule.state != "configuring").all() db_jobs = {sch.node_name: sch for sch in schedule} if len(db_jobs) == 0: close_session(db) return json.dumps(result) # Connect to the grid5000 API g5k_site = g5k_connect(arg_dict) user_jobs = g5k_site.jobs.list(state="running", user=arg_dict["g5k_user"]) user_jobs += g5k_site.jobs.list(state="waiting", user=arg_dict["g5k_user"]) check_deleted_jobs(db_jobs, user_jobs, db) for j in user_jobs: j.refresh() uid_str = str(j.uid) if uid_str in db_jobs: my_conf = db_jobs[uid_str] result["nodes"][uid_str] = { "node_name": uid_str, "bin": my_conf.bin, "start_date": my_conf.start_date, "end_date": my_conf.end_date, "state": my_conf.state, "job_state": j.state } assigned_nodes = db.query(ActionProperty).filter( ActionProperty.node_name == my_conf.node_name).filter( ActionProperty.prop_name == "assigned_nodes").first() if assigned_nodes is None: if len(j.assigned_nodes) > 0: assigned_nodes = ActionProperty() assigned_nodes.owner = arg_dict["user"] assigned_nodes.node_name = my_conf.node_name assigned_nodes.prop_name = "assigned_nodes" assigned_nodes.prop_value = ",".join(j.assigned_nodes) db.add(assigned_nodes) result["nodes"][uid_str][ "assigned_nodes"] = assigned_nodes.prop_value else: result["nodes"][uid_str][ "assigned_nodes"] = assigned_nodes.prop_value close_session(db) return json.dumps(result)
def node_reserve(arg_dict): # Check arguments if "filter" not in arg_dict or "user" not in arg_dict or \ "start_date" not in arg_dict or "duration" not in arg_dict or \ "g5k_user" not in arg_dict or "g5k_password" not in arg_dict: logging.error("Missing parameters: '%s'" % arg_dict) return json.dumps({ "parameters": { "user": "******", "filter": "{...}", "start_date": 1623395254, "duration": 3, "g5k_password": "******", "g5k_user": "******" } }) result = {"nodes": []} user = arg_dict["user"] f = arg_dict["filter"] # f = {'nb_nodes': '3', 'model': 'RPI4B8G', 'switch': 'main_switch'} nb_nodes = int(f["nb_nodes"]) del f["nb_nodes"] start_date = arg_dict["start_date"] end_date = start_date + arg_dict["duration"] * 3600 # Connect to the grid5000 API g5k_site = g5k_connect(arg_dict) # Get the node list servers = build_server_list(g5k_site) filtered_nodes = [] if "name" in f: if f["name"] in servers: filtered_nodes.append(f["name"]) else: # Get the node properties used in the filter node_props = {} if len(f) == 0: filtered_nodes += servers.keys() else: for node in servers.values(): ok_filtered = True for prop in f: if node[prop] != f[prop]: ok_filtered = False if ok_filtered: filtered_nodes.append(node["name"]) # Check the availability of the filtered nodes logging.info("Filtered nodes: %s" % filtered_nodes) selected_nodes = [] node_status = {} for node_name in filtered_nodes: cluster_name = node_name.split("-")[0] if cluster_name not in node_status: node_status[cluster_name] = status_to_reservations( g5k_site.clusters[cluster_name].status.list().nodes) ok_selected = True # Move the start date back 15 minutes to give the time for destroying the previous reservation back_date = start_date - 15 * 60 # Check the schedule of the existing reservations if node_name in node_status[cluster_name]: for reservation in node_status[cluster_name][node_name]: # Only one reservation for a specific node per user if reservation["owner"] == user: ok_selected = False # There is no reservation at the same date if (back_date > reservation["start_date"] and back_date < reservation["end_date"]) or ( back_date < reservation["start_date"] and end_date > reservation["start_date"]): ok_selected = False if ok_selected: # Add the node to the reservation selected_nodes.append(node_name) if len(selected_nodes) == nb_nodes: # Exit when the required number of nodes is reached break logging.info("Selected nodes: %s" % selected_nodes) # Set the duration of the job walltime = "%s:00" % arg_dict["duration"] # Set a 'sleep' command that lasts 30 days, i.e, the maximum duration of this job. # This command allows to extend the reservation (see node_extend()) command = "sleep %d" % (30 * 24 * 3600) job_conf = { "name": "piseduce %s" % datetime.now(), "resources": "nodes=%d,walltime=%s" % (len(selected_nodes), walltime), "command": command, "types": ["deploy"] } # Set the 'reservation' property to define the job's start date now = int(time.time()) delta_s = start_date - now if delta_s > 5 * 60: # Only consider the start_date if this date is after the next 5 minutes local_date = datetime.fromtimestamp(start_date).astimezone( pytz.timezone("Europe/Paris")) job_conf["reservation"] = str(local_date)[:-6] if len(selected_nodes) == 1: # Reserve the node from its server name logging.info("Reservation the node '%s' with the walltime '%s'" % (selected_nodes[0], walltime)) job_conf["properties"] = "(host in ('%s.%s.grid5000.fr'))" % ( selected_nodes[0], g5k_site.uid) else: # Reserve the nodes from cluster names clusters = set() for node in selected_nodes: clusters.add(node.split("-")[0]) logging.info( "Reservation on the clusters '%s' with the walltime '%s'" % (clusters, walltime)) job_conf["properties"] = "(cluster in (%s))" % ",".join( ["'%s'" % c for c in clusters]) try: job = g5k_site.jobs.create(job_conf) result["nodes"] = selected_nodes # Store the g5k login/password to the DB in order to use it with agent_exec.py db = open_session() g5k_cred = ActionProperty() g5k_cred.node_name = job.uid g5k_cred.owner = arg_dict["user"] g5k_cred.prop_name = "g5k" g5k_cred.prop_value = "%s/%s" % (arg_dict["g5k_user"], arg_dict["g5k_password"]) db.add(g5k_cred) close_session(db) except: logging.exception("Creating job: ") return json.dumps(result)
def node_deploy(arg_dict): # Check the parameters if "user" not in arg_dict or "@" not in arg_dict[ "user"] or "nodes" not in arg_dict: error_msg = { "parameters": { "user": "******", "nodes": { "node-3": { "node_bin": "my_bin", "environment": "my-env" } } } } return json.dumps(error_msg) # Check the nodes dictionnary node_prop = arg_dict["nodes"] if isinstance(node_prop, dict): for val in node_prop.values(): if not isinstance(val, dict): return json.dumps(error_msg) else: return json.dumps(error_msg) result = {} # Add the properties to the job configuration for node_name in node_prop: result[node_name] = {} my_prop = node_prop[node_name] if "node_bin" not in my_prop or len(my_prop["node_bin"]) == 0: if "missing" not in result[node_name]: result[node_name]["missing"] = ["node_bin"] else: result[node_name]["missing"].append("node_bin") if "environment" not in my_prop or len(my_prop["environment"]) == 0: if "missing" not in result[node_name]: result[node_name]["missing"] = ["environment"] else: result[node_name]["missing"].append("environment") if len(result[node_name]) == 0: # Remove special characters from the node bin name node_bin = safe_string(my_prop["node_bin"]) # Remove spaces from value node_bin = node_bin.replace(" ", "_") # Record the job configuration to the database db = open_session() my_job = db.query(Schedule).filter( Schedule.node_name == node_name).first() if my_job is None: logging.error("job %s not found in the Schedule DB table" % node_name) else: my_job.bin = node_bin my_job.state = "ready" env = ActionProperty() env.owner = arg_dict["user"] env.node_name = my_job.node_name env.prop_name = "environment" env.prop_value = my_prop["environment"] db.add(env) ssh_key = ActionProperty() ssh_key.owner = arg_dict["user"] ssh_key.node_name = my_job.node_name ssh_key.prop_name = "ssh_key" if "form_ssh_key" in my_prop and len( my_prop["form_ssh_key"]) > 0: ssh_key.prop_value = my_prop["form_ssh_key"] db.add(ssh_key) elif "account_ssh_key" in my_prop and len( my_prop["account_ssh_key"]) > 0: ssh_key.prop_value = my_prop["account_ssh_key"] db.add(ssh_key) close_session(db) result[node_name] = {"state": "ready"} return json.dumps(result)
def node_configure(arg_dict): if "user" not in arg_dict or "@" not in arg_dict["user"]: return json.dumps({"parameters": {"user": "******"}}) result = {} # The list of the g5k environments env_names = [ "centos7-x64-min", "centos8-x64-min", "debian10-x64-base", "debian10-x64-big", "debian10-x64-min", "debian10-x64-nfs", "debian10-x64-std", "debian10-x64-xen", "debian9-x64-base", "debian9-x64-big", "debian9-x64-min", "debian9-x64-nfs", "debian9-x64-std", "debian9-x64-xen", "debiantesting-x64-min", "ubuntu1804-x64-min", "ubuntu2004-x64-min" ] # Common properties to every kind of nodes conf_prop = { "node_bin": { "values": [], "mandatory": True }, "environment": { "values": env_names, "mandatory": True } } conf_prop.update(CONFIGURE_PROP) # Get the jobs in the schedule by reading the DB db = open_session() schedule = db.query(Schedule).filter( Schedule.owner == arg_dict["user"]).all() uids = {sch.node_name: sch for sch in schedule} # Connect to the grid5000 API g5k_site = g5k_connect(arg_dict) # Get the grid5000 jobs for the grid5000 user user_jobs = g5k_site.jobs.list(state="running", user=arg_dict["g5k_user"]) user_jobs += g5k_site.jobs.list(state="waiting", user=arg_dict["g5k_user"]) # Deleted jobs that do not exist anymore check_deleted_jobs(uids, user_jobs, db) # Add the unregistered grid5000 jobs to the DB for j in user_jobs: # Wait for the start_date while j.started_at == 0: j.refresh() time.sleep(1) job_id = str(j.uid) if job_id in uids: schedule = uids[job_id] else: start_date = j.started_at end_date = j.started_at + j.walltime # Record the job properties to the database schedule = Schedule() schedule.node_name = str(j.uid) schedule.owner = arg_dict["user"] schedule.start_date = start_date schedule.end_date = end_date schedule.state = "configuring" schedule.action_state = "" db.add(schedule) # Send the job information about job in the 'configuring' state if schedule.state == "configuring": result[schedule.node_name] = conf_prop result[schedule.node_name]["start_date"] = schedule.start_date result[schedule.node_name]["end_date"] = schedule.end_date close_session(db) return json.dumps(result)
def node_reserve(arg_dict): # Check arguments if "filter" not in arg_dict or "user" not in arg_dict or \ "start_date" not in arg_dict or "duration" not in arg_dict or \ "iot_user" not in arg_dict or "iot_password" not in arg_dict: logging.error("Missing parameters: '%s'" % arg_dict) return json.dumps({ "parameters": { "user": "******", "filter": "{...}", "start_date": 1623395254, "duration": 3, "iot_password": "******", "iot_user": "******" } }) result = {"nodes": []} user = arg_dict["user"] f = arg_dict["filter"] # f = {'nb_nodes': '3', 'model': 'RPI4B8G', 'switch': 'main_switch'} nb_nodes = int(f["nb_nodes"]) del f["nb_nodes"] start_date = arg_dict["start_date"] end_date = start_date + arg_dict["duration"] * 3600 # Get the node list servers = build_server_list() filtered_nodes = [] if "name" in f: if f["name"] in servers: filtered_nodes.append(f["name"]) else: # Get the node properties used in the filter node_props = {} if len(f) == 0: filtered_nodes += servers.keys() else: for node in servers.values(): ok_filtered = True for prop in f: if node[prop] != f[prop]: ok_filtered = False if ok_filtered: filtered_nodes.append(node["name"]) # Check the availability of the filtered nodes logging.info("Filtered nodes: %s" % filtered_nodes) selected_nodes = [] node_status = {} for node_name in filtered_nodes: ok_selected = True # Move the start date back 15 minutes to give the time for destroying the previous reservation back_date = start_date - 15 * 60 # Check the running experiments of the IoT-Lab plateform for name, reservations in experiment_to_reservation().items(): if name == node_name: for resa in reservations: # Only one reservation for a specific node per user if name == node_name and resa["owner"] == user: ok_selected = False # There is no reservation at the same date if (back_date > resa["start_date"] and back_date < resa["end_date"]) or \ (back_date < resa["start_date"] and end_date > resa["start_date"]): ok_selected = False if ok_selected: # Add the node to the reservation selected_nodes.append(node_name) if len(selected_nodes) == nb_nodes: # Exit when the required number of nodes is reached break logging.info("Selected nodes: %s" % selected_nodes) if len(selected_nodes) > 0: archi = servers[selected_nodes[0]]["archi"] db = open_session() if "name" in f: node_id = selected_nodes[0].split("-")[1] selection = db.query(IotSelection).filter( IotSelection.owner == user).filter( IotSelection.archi == archi).filter( IotSelection.start_date == start_date).filter( IotSelection.node_ids != "").first() if selection is None: iot_filter = "%s,%s,%s" % (get_config()["iot_site"], archi.split(":")[0], node_id) iot_selection = IotSelection() iot_selection.owner = user iot_selection.filter_str = iot_filter iot_selection.archi = archi iot_selection.node_ids = node_id iot_selection.node_nb = "" iot_selection.start_date = start_date iot_selection.end_date = end_date db.add(iot_selection) else: selection.filter_str += "+%s" % node_id selection.node_ids += "+%s" % node_id else: iot_filter = "%d,archi=%s+site=%s" % (len(selected_nodes), archi, get_config()["iot_site"]) iot_selection = IotSelection() iot_selection.owner = user iot_selection.filter_str = iot_filter iot_selection.archi = archi iot_selection.node_ids = "" iot_selection.node_nb = len(selected_nodes) iot_selection.start_date = start_date iot_selection.end_date = end_date db.add(iot_selection) # Store the iot-lab login/password to the DB in order to use it with agent_exec.py result["nodes"] = selected_nodes close_session(db) return json.dumps(result)
def check_port(switch_port): cluster_desc = get_cluster_desc() switch = cluster_desc['switches'][switch_port.split('-')[0]] port_number = int(switch_port.split('-')[1]) # Do not turn off the pimaster if switch['master_port'] == port_number: return {'status': 'failed'} # Looking for the node on the port my_node = None for node in cluster_desc['nodes'].values(): if node['switch'] == switch['name'] and node[ 'port_number'] == port_number: my_node = node # No node linked to the port if my_node is None: return {'status': 'failed'} else: # Check the node is not currently used db_session = open_session() states = db_session.query(Deployment).filter( Deployment.state != 'destroyed').filter( Deployment.node_name == node['name']).all() close_session(db_session) if len(states) > 0: return {'status': 'failed'} # Create the TFTP boot folder tftpboot_template_folder = "/tftpboot/rpiboot_uboot" tftpboot_node_folder = "/tftpboot/%s" % my_node["id"] if os.path.isdir(tftpboot_node_folder): shutil.rmtree(tftpboot_node_folder) os.mkdir(tftpboot_node_folder) for tftpfile in glob('%s/*' % tftpboot_template_folder): if tftpfile.endswith('cmdline.txt'): shutil.copyfile( tftpfile, tftpfile.replace(tftpboot_template_folder, tftpboot_node_folder)) else: os.symlink( tftpfile, tftpfile.replace(tftpboot_template_folder, tftpboot_node_folder)) # Turn off the PoE port turn_off_port(switch['name'], port_number) # Ping the node IP address cmd = 'ping -c 1 -W 1 %s' % my_node['ip'] process = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ret_code = process.returncode if ret_code == 0: return {'status': 'failed'} # Turn on the PoE port turn_on_port(switch['name'], port_number) # Try to ping the node time.sleep(30) ret_code = 1 nb = 0 while ret_code != 0 and nb < 6: cmd = 'ping -c 1 -W 1 %s' % my_node['ip'] process = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ret_code = process.returncode nb += 1 if ret_code != 0: time.sleep(10) # Delete tftp directory shutil.rmtree(tftpboot_node_folder) # Turn off the PoE port turn_off_port(switch['name'], port_number) if ret_code == 0: return {'status': 'succeed'} else: return {'status': 'failed'}
def node_conf(switch_name): result = {"errors": [], "serial": ""} if "node_ip" not in flask.request.json or "port" not in flask.request.json \ or "base_name" not in flask.request.json: result["errors"].append( "Required parameters: 'node_ip', 'base_name', 'port'") return json.dumps(result) node_ip = flask.request.json["node_ip"] node_port = flask.request.json["port"] node_name = "%s-%s" % (flask.request.json["base_name"], node_ip.split(".")[-1]) node_model = "" node_serial = "" ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: ssh.connect(node_ip, username="******", timeout=1.0) (stdin, stdout, stderr) = ssh.exec_command("cat /proc/cpuinfo") return_code = stdout.channel.recv_exit_status() for line in stdout.readlines(): output = line.strip() if "Revision" in output: rev = output.split()[-1] if rev == "a020d3": node_model = "RPI3B+1G" if rev == "a03111": node_model = "RPI4B1G" if rev in ["b03111", "b03112", "b03114"]: node_model = "RPI4B2G" if rev in ["c03111", "c03112", "c03114"]: node_model = "RPI4B4G" if rev == "d03114": node_model = "RPI4B8G" if len(node_model) == 0: node_model = "unknown" if "Serial" in output: node_serial = output.split()[-1][-8:] result["serial"] = node_serial ssh.close() # End of the configuration, turn off the node turn_off_port(switch_name, node_port) # Write the node information to the database if len(node_serial) > 0 and len(node_model) > 0: db = open_session() existing = db.query(RaspNode).filter( RaspNode.name == node_name).all() for to_del in existing: db.delete(to_del) new_node = RaspNode() new_node.name = node_name new_node.ip = node_ip new_node.switch = switch_name new_node.port_number = node_port new_node.model = node_model new_node.serial = node_serial db.add(new_node) close_session(db) except (AuthenticationException, SSHException, socket.error): logging.warn("[node-%s] can not connect via SSH to %s" % (node_port, node_ip)) except: logging.exception("[node-%s] node configuration fails" % node_port) return json.dumps(result)
def del_admin(user_id): db_session = open_session() admin_user = db_session.query(User).filter_by(id=user_id).first() admin_user.is_admin = 0 close_session(db_session) return flask.redirect(flask.url_for("app.admin"))
def node_deploy(arg_dict): # Check the parameters if "user" not in arg_dict or "@" not in arg_dict[ "user"] or "nodes" not in arg_dict: error_msg = { "parameters": { "user": "******", "nodes": { "node-3": { "node_bin": "my_bin", "environment": "my-env" } } } } return json.dumps(error_msg) # Check the nodes dictionnary node_prop = arg_dict["nodes"] if isinstance(node_prop, dict): for val in node_prop.values(): if not isinstance(val, dict): return json.dumps(error_msg) else: return json.dumps(error_msg) result = {} # Get the iot selections from the DB db = open_session() selections = db.query(IotSelection).filter( IotSelection.owner == arg_dict["user"]).all() for sel in selections: sel_name = build_name(sel) if sel_name in node_prop: result[sel_name] = {} my_prop = node_prop[sel_name] if "node_bin" not in my_prop or len(my_prop["node_bin"]) == 0: result[sel_name]["missing"] = ["node_bin"] else: # Remove special characters from the node bin name node_bin = safe_string(my_prop["node_bin"]) # Remove spaces from value node_bin = node_bin.replace(" ", "_") iot_list = sel.filter_str firmware_path = "" if len(my_prop["firmware"]) > 0: firmware_path = "iot-lab/firmware/%s" % my_prop["firmware"] if not os.path.isfile(firmware_path): firmware_path = "iot-lab/firmware/%s/%s" % ( arg_dict["iot_user"], my_prop["firmware"]) if not os.path.isfile(firmware_path): firmware_path = "" if len(firmware_path) > 0 and len(my_prop["profile"]) > 0: iot_list += ",%s,%s" % ( firmware_path, my_prop["profile"].split("-", 1)[1]) else: if len(firmware_path) > 0: iot_list += ",%s" % firmware_path if len(my_prop["profile"]) > 0: iot_list += ",,%s" % my_prop["profile"].split("-", 1)[1] if sel.start_date > time.time() + 5 * 60: cmd = "iotlab-experiment -u %s -p %s submit -n %s -r %d -d %d -l %s" % ( arg_dict["iot_user"], decrypt_password(arg_dict["iot_password"]), node_bin, sel.start_date, (sel.end_date - sel.start_date) / 60, iot_list) else: cmd = "iotlab-experiment -u %s -p %s submit -n %s -d %d -l %s" % ( arg_dict["iot_user"], decrypt_password(arg_dict["iot_password"]), node_bin, (sel.end_date - sel.start_date) / 60, iot_list) process = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if process.returncode > 0: if len(process.stdout) > 0: logging.error(process.stdout.split()) result["error"] = [process.stdout] elif len(process.stderr) > 0: logging.error(process.stderr.split("\n\t")[-1][:-1]) result["error"] = [ process.stderr.split("\n\t")[-1][:-1] ] else: json_data = json.loads(process.stdout) # Delete the iot_selection entry and create the schedule entry result[sel_name] = {"state": "ready"} schedule = Schedule() schedule.node_name = json_data["id"] schedule.owner = sel.owner schedule.bin = node_bin schedule.start_date = sel.start_date schedule.end_date = sel.end_date schedule.state = "ready" schedule.action_state = "" db.add(schedule) db.delete(sel) close_session(db) return json.dumps(result)