def task_device_start(): try: mid = request.json.get('mid', '') uid = request.json.get('uid', '') if mid == '': return jsonify({"result": -1, "message": "failed"}) query_result, qs_count = db_func.query_task({'mid': mid}) if qs_count == 0: return jsonify({"result": -2, "message": "任务未找到"}) task_item = query_result[0] if task_item["status"] == "训练停止": return jsonify({"result": -2, "message": "任务已停止"}) program_info = query_result[0]["program_info"] # 解析程序相关信息 client_info = program_info["client"] client_dict = {c["object"]: c for c in client_info} server_ip = current_app.config["SERVER_IP"] server_port = int(current_app.config["SERVER_PORT"]) + int(mid) % 65535 launch.launch_client([uid], client_dict, server_ip, server_port, mid, task_item["name"], task_item["model_type"]) return jsonify({"result": 0, "message": "success"}) except Exception as e: traceback.print_exc() current_app.logger.error(e) return jsonify({"result": -2, "message": "failed"})
def task_query(): try: mid = request.args.get('mid', '') if mid == '': return jsonify({"result": -1, "message": "failed"}) query_result, count = db_func.query_task({"mid": mid}) if count == 0: return jsonify({"result": -2, "message": "failed"}) task = query_result[0] # 获取设备基本信息(不含实时信息,要按顺序排好,没有查到的返回{}) uids = task["devices"].split("|") devices = [{} for uid in uids] uid2index = {int(uid): idx for idx, uid in enumerate(uids)} query_res = db_func.query_device_list(uids) for q in query_res: devices[uid2index[q["uid"]]] = q res = {"task": task, "devices": devices} return jsonify({"result": 0, "message": "success", "value": res}) except Exception as e: traceback.print_exc() current_app.logger.error(e) return jsonify({"result": -3, "message": "failed"})
def task_list(): try: page = request.args.get('currentPage', 1) query_result, count = db_func.query_task({}, page=int(page)) for t in query_result: devices = t["devices"].split("|") t["devices_running"] = 0 t["devices_starting"] = 0 t["devices_stopped"] = 0 for uid in devices: device_status = str(redis_func.get_device_model_status(uid, t["mid"])) if device_status == '1': t['devices_running'] += 1 if device_status == '0': t['devices_starting'] += 1 if device_status == '-1': t['devices_stopped'] += 1 return jsonify({"result": 0, "message": "success", "value": query_result, "count": count}) except Exception as e: current_app.logger.error(e) return jsonify({"result": -2, "message": "failed"})
def stop_client(mid): query_result, qs_count = db_func.query_task({'mid': mid}) if qs_count == 0: raise Exception("No mid ", mid) uids = query_result[0]["devices"].split('|') for uid in uids: ws.stop_task(query_result[0], uid)
def task_device_stop(): try: mid = request.json.get('mid', '') uid = request.json.get('uid', '') if mid == '' or uid == '': return jsonify({"result": -1, "message": "failed"}) query_result, qs_count = db_func.query_task({'mid': mid}) if qs_count > 0 and ws.stop_task(query_result[0], uid): return jsonify({"result": 0, "message": "success"}) else: return jsonify({"result": -2, "message": "failed"}) except Exception as e: current_app.logger.error(e) return jsonify({"result": -3, "message": "failed"})
def task_start(): try: mid = request.json.get('mid', '') if mid == '': return jsonify({"result": -1, "message": "failed"}) query_result, qs_count = db_func.query_task({'mid': mid}) if qs_count == 0: return jsonify({"result": -2, "message": "任务未找到"}) task_item = query_result[0] if task_item["status"] == "正在训练": return jsonify({"result": -2, "message": "任务已启动"}) program_info = query_result[0]["program_info"] # 解析程序相关信息 server_info = program_info["server"] client_info = program_info["client"] client_dict = {c["object"]: c for c in client_info} # 相关检查 user_server_cmd = server_info["cmd"] # server_cmd = "docker run -d comp" # server_cmd_supervisor = "docker run -d comp" # 启动服务端(PS和Supervisor,理论上只有一个服务端,同时完成参数聚合和准确度计算的工作,这里简便起见,分开成两个) # PS server_ip = current_app.config["SERVER_IP"] server_port = int(current_app.config["SERVER_PORT"]) + int(mid) % 65535 launch.restart_server(mid) success_count = launch.launch_client(task_item["devices"].split('|'), client_dict, server_ip, server_port, mid, task_item["name"], task_item["model_type"]) # 更新状态 db_func.update_task(mid, {'status': '正在训练', 'end_time': None}) return jsonify({"result": 0, "message": "success", "value": success_count}) except Exception as e: traceback.print_exc() current_app.logger.error(e) return jsonify({"result": -2, "message": "failed"})
def task_delete(): try: mid = request.json.get('mid', '') if mid == '': return 'Error', 404 query_result, qs_count = db_func.query_task({'mid': mid}) if qs_count == 0: return jsonify({"result": -1, "message": "未找到任务"}) if query_result[0]['status'] != '训练停止': return jsonify({"result": -2, "message": "请先停止训练,再删除任务"}) launch.delete_server(mid) # 删除server的容器, client的在停止时已经删除 db_func.delete_task(mid) return jsonify({"result": 0, "message": "success"}) except Exception as e: current_app.logger.error(e) return jsonify({"result": -3, "message": "failed"})