class DistributedOptimizer(DistributedSwarm): def stream_progress(self, state, observation, reward): example = pd.DataFrame({"reward": [reward]}, index=[self.n_iters // self.n_swarms]) self.stream.emit(example) msg_obs = "Best solution found:\n {}".format( np.round(observation, 2).tolist()) msg_reward = "Best value found: {:.4f}".format(reward) data = [[0, 1, msg_reward], [0, 2, msg_obs]] self.frame_pipe.send(pd.DataFrame(data, columns=["x", "y", "label"])) def init_plot(self): self.frame_pipe = Pipe(data=[]) self.frame_dmap = hv.DynamicMap(hv.Labels, streams=[self.frame_pipe]) self.frame_dmap = self.frame_dmap.opts( xlim=(-10, 10), ylim=(0.5, 2.5), height=200, width=500, xaxis=None, yaxis=None, title="Best solution", ) example = pd.DataFrame({"reward": []}) self.stream = Stream() self.buffer_df = DataFrame(stream=self.stream, example=example) self.score_dmap = self.buffer_df.hvplot(y=["reward"]).opts( height=200, width=400, title="Best value found")
class DistributedLennardJonnes(DistributedSwarm): def stream_progress(self, state, observation, reward): ix = self.n_iters // self.n_swarms example = pd.DataFrame({"reward": [reward]}, index=[ix]) self.stream.emit(example) # msg_obs = "Best solution found:\n {}".format(numpy.round(observation, 2).tolist()) msg_reward = "Best value found: {:.4f}".format(reward) data = [[ix * 0.5, self.init_reward - 3, msg_reward]] self.label_pipe.send(pd.DataFrame(data, columns=["x", "y", "label"])) if self.best[0] is not None: x = self.best[0].reshape(-1, 3) d = { "x": x[:, 0].copy().tolist(), "y": x[:, 1].copy().tolist(), "z": x[:, 2].copy().tolist(), } self.best_pipe.send(d) def plot(self): return self.best_dmap + self.label_dmap * self.score_dmap def init_plot(self): hv.extension("plotly") self.best_pipe = Pipe(data=[]) self.best_dmap = hv.DynamicMap(hv.Scatter3D, streams=[self.best_pipe]) self.best_dmap = self.best_dmap.opts( xlim=(-2, 2), ylim=(-2, 4), color="red", alpha=0.7, # height=600, width=600, xaxis=None, yaxis=None, title="Best solution", ) self.label_pipe = Pipe(data=[]) self.label_dmap = hv.DynamicMap(hv.Labels, streams=[self.label_pipe]) self.label_dmap = self.label_dmap.opts( # height=200, width=400, xaxis=None, yaxis=None, title="Best solution", ) example = pd.DataFrame({"reward": []}) self.stream = Stream() self.buffer_df = DataFrame(stream=self.stream, example=example) self.score_dmap = self.buffer_df.hvplot( y=["reward"]).opts( # height=200, width=400, title="Best value found")
class DistributedSwarm: def __init__( self, swarm: Callable, n_swarms: int, n_param_servers: int, max_iters_ray: int = 10, log_every: int = 100, n_comp_add: int = 5, minimize: bool = False, ps_maxlen: int = 100, init_reward: float = None, log_reward: bool = False, ): self.n_swarms = n_swarms self.minimize = minimize self.log = log_reward self.init_reward = (init_reward if init_reward is not None else (np.inf if minimize else -np.inf)) self.log_every = log_every self.param_servers = [ ParamServer.remote(minimize=minimize, maxlen=ps_maxlen) for _ in range(n_param_servers) ] self.swarms = [ RemoteSwarm.remote(copy.copy(swarm), int(n_comp_add), minimize=minimize) for _ in range(self.n_swarms) ] self.max_iters_ray = max_iters_ray self.frame_pipe: Pipe = None self.stream = None self.buffer_df = None self.score_dmap = None self.frame_dmap = None self.init_plot() self.n_iters = 0 self.best = (None, None, None) def init_plot(self): self.frame_pipe = Pipe(data=[]) self.frame_dmap = hv.DynamicMap(hv.RGB, streams=[self.frame_pipe]) self.frame_dmap = self.frame_dmap.opts(xlim=(-0.5, 0.5), ylim=(-0.5, 0.5), xaxis=None, yaxis=None, title="Game screen") example = pd.DataFrame({"reward": []}) self.stream = Stream() self.buffer_df = DataFrame(stream=self.stream, example=example) self.score_dmap = self.buffer_df.hvplot(y=["reward"]).opts( height=200, width=500, title="Game score") def plot(self): return self.frame_dmap + self.score_dmap def stream_progress(self, state, observation, reward): example = pd.DataFrame({"reward": [reward]}, index=[self.n_iters // self.n_swarms]) self.stream.emit(example) obs = observation.reshape((210, 160, 3)).astype(np.uint8) self.frame_pipe.send(obs) def run_swarm(self): self.n_iters = 0 best_ids = [s.reset.remote() for s in self.swarms] steps = {} param_servers = deque([]) for worker, best in zip(self.swarms, best_ids): steps[worker.make_iteration.remote(best)] = worker bests = [] for ps, walker in zip(self.param_servers, list(steps.keys())[:len(self.param_servers)]): bests.append(ps.exchange_walker.remote(walker)) param_servers.append(ps) ray.get(bests) for i in range(self.max_iters_ray * len(self.swarms)): self.n_iters += 1 ready_bests, _ = ray.wait(list(steps)) ready_best_id = ready_bests[0] worker = steps.pop(ready_best_id) ps = param_servers.popleft() new_best = ps.exchange_walker.remote(ready_best_id) param_servers.append(ps) steps[worker.make_iteration.remote(new_best)] = worker if i % (self.log_every * len(self.swarms)) == 0: id_, _ = ray.wait([param_servers[-1].get_best.remote()]) (state, best_obs, best_reward) = ray.get(id_)[0] if state is not None: self.best = (state, best_obs, float(best_reward)) if ((best_reward > self.init_reward) if self.minimize else (best_reward < self.init_reward)): best_reward = self.init_reward best_reward = np.log( best_reward) if self.log else best_reward self.stream_progress(state, best_obs, best_reward) else: print("skipping, not ready")