async def main(): global running_container await sio.connect(args.BASE_URL) while True: try: info = getRealtimeDeviceInfo() info["ping"] = ping(args.host) info["uid"] = uid info["running"] = len(running_container) if sio.connected: console_log("CPU: {:d}% MEM: {:.1f}% PING: {:.1f}ms".format( int(info["cpu_ing"]), float(info["memory_ing"]) * 100 / float(info["memory"]), float(info["ping"]))) await sio.emit('heart', info) await sio.sleep(5) except asyncio.CancelledError: print("Got CancelledError") break except Exception as e: print("Exception:: ", e) await sio.disconnect() break await sio.wait()
async def docker_run(cmd, container_name, log_file): try: console_log("镜像启动...", 3) cmd_list = list(cmd) cmd_list.insert(11, "--name " + container_name) cmd = ''.join(cmd_list) await run_cmd_output("docker rm -f {}".format(container_name), log_file) await asyncio.sleep(1) await run_cmd_output(cmd, log_file) # 检查运行状态 check_cnt = 0 while check_cnt < 5: await asyncio.sleep(0.5) res = await __docker_check_running(container_name) if res: console_log("镜像启动成功", 2) return True else: check_cnt += 1 console_log("镜像启动失败", 1) return False except Exception as e: console_log("镜像启动失败: " + str(e), 1) return False
async def main(): number_of_tweets = 6 restart_delay = 10.0 quote_id = -1 while True: restart = next_tweet = False log_msg = '' logger.setLevel(ERROR) try: quote = await get_quote(quote_id, 3) # fetches quote to tweet if not database.get(quote=quote['id']): quote_id = quote['id'] try: # tweet received quote tweet = api.update_status(status=f"{quote['quote']}\n\n- {quote['author']} -") if tweet.id: console_log(f'Tweet posted: {tweet.id} -> {truncate(tweet.text, 57)!r}') # record tweet in the database database.insert(timestamp=time.asctime(), id=tweet.id, quote=quote['id']) logger.setLevel(INFO) log_msg = f'Tweet {tweet.id_str} successful' quote_id = -1 next_tweet = True else: log_msg = f"Tweet failed - quote:{quote['id']}" except TweepError as tweet_error: tweet_error = json.loads(tweet_error.response.text)['errors'].pop() console_log(tweet_error) log_msg = f"Tweet failed - {tweet_error['code']} - {tweet_error['message']}" else: log_msg = f"Quote {quote['id']} already tweeted" except requests.exceptions.ConnectionError as conn_error: log_msg = 'Connection error -> Failed to fetch quote' restart = True finally: console_log(f"Logging message to file: {log_msg}") await logger.log_message(msg=log_msg) if next_tweet: snooze_time = (24 / number_of_tweets) * 60 * 60 console_log(f"Snoozing for {convert_time(snooze_time)} until next tweet 💤") await asyncio.sleep(snooze_time) if restart: print('\nRestarting...') await asyncio.sleep(restart_delay)
async def docker_stop(container_name): try: console_log("镜像停止...", 3) res = await run_cmd_output_wait( "docker rm -f {}".format(container_name)) if res.decode().strip() == container_name: console_log("镜像停止成功", 2) return True console_log("镜像停止失败", 1) except Exception as e: console_log("镜像停止失败", 1) traceback.print_exc() return False
async def docker_download_file(image_url, log_file): try: console_log("开始下载镜像", 3) res = await run_cmd_output("docker pull {}".format(image_url), log_file) if res: console_log("镜像下载完成", 3) return True else: console_log("镜像下载失败", 1) except Exception as e: console_log("镜像下载失败: " + str(e), 1) return False
def register(): global jwt_key, uid try: console_log("获取设备信息...") info = getDeviceInfo() console_init(info) console_log("设备注册中... (服务器IP: {})".format(args.host)) response = requests.post(url=args.REGISTER_URL, headers={'Content-Type': 'application/json'}, data=json.dumps(info)) if response.json().get("result", "") != 0: console_log("注册失败,请重试", 1) exit() else: jwt_key = response.json().get("value", "") uid = response.json().get("uid", "") console_log("注册成功 (UID: {})".format(uid), 2) except Exception as e: console_log("注册失败,请重试 [{}]".format(e), 1)
async def stop_task(data): global args, uid console_log('停止任务:{}'.format(data["name"]), 3) mid = data.get("mid", '') try: if mid == '': raise Exception('缺少ID, 停止任务 {} 失败'.format(data["name"])) container_name = running_container.get(mid, '') if container_name != '': running_container.pop(mid, '') if await docker_stop(container_name): console_log('训练任务停止成功', 2) await sio.emit('task_states_update', { 'uid': uid, 'mid': mid, 'status': '-1' }) return raise Exception('训练任务停止失败') raise Exception('没有找到训练任务: {}'.format(data["name"])) except Exception as e: console_log(str(e), 1) await sio.emit('task_states_update', { 'uid': uid, 'mid': mid, 'status': '-1' })
async def new_task(data): global args, running_container, uid console_log('接收到新的任务:{}'.format(data["name"]), 3) mid = data.get("mid", '') try: if mid == '': raise Exception('缺少ID, 任务 {} 无效'.format(data["name"])) if data["format"] == "Docker镜像": # 指定名字,并添加到全局变量running_container中,方便停止 container_name = "container_" + str(mid) running_container[mid] = container_name res = await docker_download_file(data["path"], args.log_file) if res: if await docker_run(data["cmd"], container_name, args.log_file): console_log('训练任务启动成功', 2) await sio.emit('task_states_update', { 'uid': uid, 'mid': mid, 'status': '1' }) return # 启动失败 raise Exception('训练任务启动失败') else: raise Exception('不支持的任务类型:{}'.format(data["format"])) except Exception as e: console_log(str(e), 1) await sio.emit('task_states_update', { 'uid': uid, 'mid': mid, 'status': '-2' })
parser.add_argument('-verbose', '--verbose', dest='verbose', help='verbose', default=1) args = parser.parse_args() # ====================test===================== # if args.data_choose == 0: args.N = 2 args.K = 2 data, labels = get_syn_data(SYN_DIR, 'small_data') trainer = Trainer(args) pred, _ = trainer.train(data[np.newaxis, :, :]) console_log(pred=pred, labels=labels, data=np.vstack((i for i in data)), model_name='HMM-VMF-test') # ====================brain===================== # if args.data_choose == 2: # ========================================================================================== # # group: n_s=30, n_c=40, sf=6 # subj: n_s=3, n_c=176, sf=10 args.N = 5 args.K = 8 func_filenames = get_adhd_data(data_dir=BRAIN_DIR, n_subjects=30) cp = ClusterProcess(model=VmfHMM(args), n_cluster=args.N * args.K, n_components=40, group=True, sub_num=3,
args = parser.parse_args() data = scio.loadmat('./datas/{}.mat'.format(args.data_name)) labels = data['z'].reshape(-1).astype(np.int) data = data['data'] print('begin training......') print( '========================dataset is {}========================'.format( args.data_name)) T, mix_threshold, algorithm_category, max_iter, dim, max_hy1f1_iter, gamma, z, u, v = DATA_PARAMS[ args.data_name][args.algorithm_category] if int(args.load_params) == 1: args.T = T args.mix_threshold = mix_threshold args.max_iter = max_iter args.max_hy1f1_iter = max_hy1f1_iter args.gamma = gamma args.z = z args.u = u args.v = v trainer = Trainer(args) trainer.train(data) pred = trainer.model.predict(data) category = np.unique(np.array(pred)) print(category) console_log(pred, labels=labels, model_name='===========dp-wmm')
parser.add_argument('-sm', '--second_max_iter', dest='second_max_iter', help='second max iteration of variational inference', default=500, type=int) parser.add_argument('-m', '--max_iter', dest='max_iter', help='max iteration of variational inference', default=10, type=int) args = parser.parse_args() data = scio.loadmat('{}/{}.mat'.format(SYNC_DIR, args.data_name)) labels = data['z'].reshape(-1) data = data['data'] print('begin training......') print('========================dataset is {}========================'.format(args.data_name)) K, T, mix_threshold, algorithm_category, max_iter, second_max_iter, threshold, group, dim = DATA_PARAMS[ args.data_name] if int(args.load_params) == 1: args.K = K args.T = T args.mix_threshold = mix_threshold args.algorithm_category = algorithm_category args.second_max_iter = second_max_iter args.threshold = threshold args.max_iter = max_iter args.omega = 0.1 args.eta = 0.1 trainer = Trainer(args) trainer.train(data) pred = trainer.model.predict(data) category = np.unique(np.array(pred)) console_log(pred, labels=labels, model_name='===========hpy-vmf')
async def connect(): console_log('服务器通信链路已建立', 2)
async def disconnect(): console_log('服务器通信链路中断,尝试重连中...', 1)
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Training Client') parser.add_argument("--host", default='localhost', help="HOST IP") parser.add_argument("-p", "--port", default='8088', help="HOST PORT") parser.add_argument("-l", "--log_file", default="client.log", help="Log output file") args = parser.parse_args() args.BASE_URL = "http://" + args.host + ":" + str(args.port) args.REGISTER_URL = args.BASE_URL + '/api/device/register' cnt = 0 while True: try: register() asyncio.run(main()) except KeyboardInterrupt: exit(1) except Exception as e: cnt += 1 if cnt < 10: time.sleep(10) console_log(e, 1) else: os.system("reboot")
args.threshold = threshold args.tau = tau args.gamma = gamma args.zeta = zeta args.u = u args.v = v args.gene = True if args.algorithm_category == 1: args.omega = 0.1 args.eta = 0.1 args.max_iter = max_iter args.test_data = datas trainer = Trainer(args) begin = time.time() trainer.train(datas) end = time.time() pred = trainer.model.predict([data]) print(pred[:500]) c = trainer.model.xi print("time: {}".format(end - begin)) print(np.unique(np.array(pred))) category = np.unique(np.array(pred)) console_log(pred, data=data, mu=c, model_name='===========hdp-wmm', newJ=len(category))
async def reboot(): console_log('执行重启指令...', 1) os.system("docker rm $(docker ps -aq)") await sio.sleep(5) os.system("reboot")
files = os.listdir('./datas/nyu/normals/') for name in files: index = int(re.sub(r'[a-z]', '', name.split('.')[0])) data = scio.loadmat( './datas/nyu/normals/{}'.format(name))['imgNormals'] label = labels[:, :, index - 1:index] data, size = scalar_data(data, 3) label, _ = scalar_data(label, 3) label = label.reshape(-1) datas = split2group(data, group) trainer = Trainer(args) begin = time.time() trainer.train(datas) c = trainer.model.xi end = time.time() pred = trainer.model.predict([data.reshape((-1, 3))]) print(np.unique(np.array(pred))) category = np.unique(np.array(pred)) measure_dict = console_log(pred, data=data.reshape((-1, 3)), labels=label, mu=c, model_name='===========chpy-wmm', newJ=len(category)) measure_dict['time'] = (end - begin) print('time: {}'.format(measure_dict['time']))
# ================================================================================================================ # args.tau = 10 args.gamma = 1 args.omega = 0.2 args.eta = 0.5 args.u = 0.9 args.v = 0.01 args.zeta = 0.01 func_filenames = get_adhd_data(data_dir=BRAIN_DIR, n_subjects=30) cp = ClusterProcess(model=VIModel_PY(args), n_components=30, smoothing_fwhm=12., memory="nilearn_cache", threshold=1., memory_level=2, verbose=10, random_state=0) b = time.time() cp.fit(func_filenames) train_data = cp.train_data pred, container, pro = cp.model.predict_brain(train_data[0:1]) e = time.time() print(e - b) # cp.plot_pro(pro.T, save=False, name='vmf-py', item_file='sub{}'.format(1)) cp.plot_all(pred, save=True, name='vmf-py', item_file='sub{}'.format(1)) ca = np.unique(pred) print(ca) measure_dict = console_log(pred=pred[:12000], data=train_data[0][:12000], model_name='HPY-VMF-brain')