def main(self): actions = { 'pong': self.on_pong, "setup": self.on_setup, "registered": self.on_registered } while True: sock = common.connect_to(self.proxy_addr, self.proxy_port) if sock is None: time.sleep(10) continue self.logger.info("connected to %s:%d" % (self.proxy_addr, self.proxy_port)) try: common.set_sock_buff_size(sock) #sock.setblocking(1) sock.settimeout(5) self.registered = False text = '' socks_to_read = [sock] send_ping = True while True: if self.registered: if send_ping: sock.sendall("ping;") send_ping = False else: send_ping = True else: sock.send("register %s;" % self.my_name) readable, writable, exceptional = select.select( socks_to_read, [], [], 10) if len(readable): try: data = common.recv(sock, common.BUFFER_SIZE) except socket.error: self.logger.error(traceback.format_exc()) break if len(data) == 0: self.logger.warn( 'read exception, disconnected by remote') break else: text += data while True: action, params, text = common.get_action(text) if action is None: break if actions.has_key(action): actions[action](sock, params) except: self.logger.error(traceback.format_exc()) common.safe_close_socket(sock)
def set_action_group(self, action_group): super(ReplaceBar, self).set_action_group(action_group) # This is a little gimmnick to make it not throw an exception # Since the holders return None when the first argument is # None. if action_group is None: get_action = lambda name: None else: get_action = lambda name: action_group.get_action(name) replace_forward = get_action(ACTION_REPLACE_FORWARD) replace_all = get_action(ACTION_REPLACE_ALL) self.toggle_find = get_action(ACTION_FIND_TOGGLE) self.signal_1 = signal_holder(replace_forward, "activate", self.on_replace_curr) self.signal_2 = signal_holder(replace_all, "activate", self.on_replace_all) sbs = subscribe_proxy(replace_forward, self.btn_replace_forward) self.replace_forward_src = sbs sbs = subscribe_proxy(replace_all, self.btn_replace_all) self.replace_all_src = sbs
def proc_cmd_connection(self, sck): sck_id = str(sck) data = common.recv(sck, common.BUFFER_SIZE) if len(data): text = self.data_of_new_socks[sck_id] + data while True: action, params, text = common.get_action(text, "\r\n") self.data_of_new_socks[sck_id] = text if action is None: break if self.actions.has_key(action): self.actions[action](sck, params, text) else: self.__remove_new_sock(sck) common.safe_close_socket(sck)
def __proc_host_actions(self, actions): try: data = common.recv(self.base_sck, common.BUFFER_SIZE) if len(data): text = self.data_from_host + data while True: action, params, text = common.get_action(text) if action is None: break if actions.has_key(action): actions[action](params) self.data_from_host = text else: # 与host的连接断开 self.logger.info(u'与host的连接断开') self.running.clear() except socket.error, e: self.running.clear() self.logger.error(traceback.format_exc())
def _create_toggle_action(self, action_group): action = lambda name: get_action(action_group.get_action, name) self.toggle_find = action(ACTION_FIND_TOGGLE) self.replace_forward = action(ACTION_REPLACE_FORWARD) self.replace_all = action(ACTION_REPLACE_ALL) return action(ACTION_REPLACE_TOGGLE)
def _create_toggle_action(self, action_group): action = lambda name: get_action(action_group.get_action, name) return action(ACTION_REPLACE_TOGGLE)
def test(block, args, d_args, r_args, d_module, r_module, enc, dec, q=None, rank=0): import torch from torch.autograd import Variable from envs import create_env, reset_env, get_obs from common import get_action, log seed = args.seed * 9823 + 194885 + rank # make sure doesn't copy train torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) i = 1 total_acc, total_reward = [], [] avg_succ, avg_reward, avg_len = 0, 0, 0 while len(total_acc) < block: reward_sum, succ = 0, 0 actions = [] if args.single_env and i > 1: reset_env(env, args) else: env = create_env(args.env_name, framework=args.framework, args=args, eval_flag=True) done = False step = 0 # Should the two LSTMs share a hidden state? cx_r = Variable(torch.zeros(1, r_args.dim)) hx_r = Variable(torch.zeros(1, r_args.dim)) if not args.baseline: cx_d = Variable(torch.zeros(1, d_args.dim)) hx_d = Variable(torch.zeros(1, d_args.dim)) while step < args.max_episode_length and not done: # Encode state state = get_obs(env, r_args.framework) state = Variable(torch.from_numpy(state).float()) if not args.baseline: z = enc(state) z_prime_hat = z.unsqueeze(0) else: z_prime_hat = state.unsqueeze(0) actions = [] if args.mcts: z_prime_hat, actions, (hx_r, cx_r), (hx_d, cx_d), _, _, _ = mcts( env, z_prime_hat, r_module, d_module, enc, (hx_r, cx_r), (hx_d, cx_d), args, discrete=r_args.discrete, use_env=args.use_env) for r in range(args.rollout - args.d): value, logit, (hx_r, cx_r) = r_module( (z_prime_hat, (hx_r, cx_r))) action, entropy, log_prob = get_action( logit, discrete=r_args.discrete) actions.append(action) if not args.baseline: z_prime_hat, _, (hx_d, cx_d) = d_module( (z_prime_hat, z_prime_hat, action, (hx_d, cx_d))) if args.save_figs: s_prime_hat = dec(z_prime_hat) for action in actions[:args.rollout]: _, reward, done, _ = env.step(action.data.numpy()) if args.render: env.render() reward_sum += reward step += 1 if done: succ = 1 break U = 1. / i total_acc.append(succ) total_reward.append(reward_sum) avg_succ = avg_succ * (1 - U) + succ * U avg_reward = avg_reward * (1 - U) + reward_sum * U avg_len = avg_len * (1 - U) + (step + 1) * U if i % args.log_interval == 0: log("Eval: {:d} episodes, avg succ {:.2f}, avg reward {:.2f}, avg length {:.2f}" .format(len(total_acc), avg_succ, reward_sum, step)) i += 1 if args.local: return (sum(total_acc), len(total_acc), sum(total_reward), avg_len) q.put((sum(total_acc), len(total_acc), sum(total_reward)))
def mcts(env, z_hat, r_module, d_module, enc, r_state, d_state, args, discrete, use_env=False): import torch import torch.nn.functional as F from torch.autograd import Variable from common import get_action from envs import get_obs (hx_r, cx_r) = r_state (hx_d, cx_d) = d_state parent_states = [(z_hat, [], (hx_r, cx_r), (hx_d, cx_d), [], [], [])] child_states = [] init_state = get_obs(env, args.framework) for i in range(args.d): actions = [] best_val = None for z_hat, trajectory, (hx_r, cx_r), (hx_d, cx_d), val, entropies, \ logprobs in parent_states: if best_val is None: best_val = val elif val < best_val: continue value, logit, (hx_r_prime, cx_r_prime) = r_module( (z_hat, (hx_r, cx_r))) val.append(value) if not discrete: for b in range(args.b): action, entropy, log_prob = get_action( logit, discrete=False, v=args.v) actions.append((action, entropy, log_prob)) else: prob = F.softmax(logit) actions = np.argpartition(prob.data.numpy(), args.b)[:b] for a, e, lp in actions: if not use_env: z_prime_hat, _, (hx_d_prime, cx_d_prime) = d_module( (z_hat, z_hat, a, (hx_d, cx_d))) else: state = get_obs(env, args.framework) for t in trajectory: env.step(t.data.numpy()) s_prime, _, _, _ = env.step(a.data.numpy()) s_prime = Variable(torch.from_numpy(s_prime).float()) z_prime_hat = enc(s_prime).unsqueeze(0) env.reset(state) hx_d_prime, cx_d_prime = hx_d, cx_d child_states.append( (z_prime_hat, trajectory + [a], (hx_r_prime, cx_r_prime), (hx_d_prime, cx_d_prime), val, entropies + [e], logprobs + [lp])) child_states = prune(child_states, b) parent_states = child_states child_states = [] # compute value of final state in each trajectory and choose best best_val = sum(parent_states[0][4]).data[0,0] best_ind = 0 for ind, (z, traj, hr, hd, v, _, _) in enumerate(parent_states): vr, _, _ = r_module((z, hr)) v.append(vr) if sum(v).data[0,0] > best_val: best_ind = ind best_val = sum(v).data[0,0] return parent_states[best_ind]