Exemple #1
0
class DistributedOptimizer(DistributedSwarm):
    def stream_progress(self, state, observation, reward):
        example = pd.DataFrame({"reward": [reward]},
                               index=[self.n_iters // self.n_swarms])
        self.stream.emit(example)
        msg_obs = "Best solution found:\n {}".format(
            np.round(observation, 2).tolist())
        msg_reward = "Best value found: {:.4f}".format(reward)
        data = [[0, 1, msg_reward], [0, 2, msg_obs]]
        self.frame_pipe.send(pd.DataFrame(data, columns=["x", "y", "label"]))

    def init_plot(self):
        self.frame_pipe = Pipe(data=[])
        self.frame_dmap = hv.DynamicMap(hv.Labels, streams=[self.frame_pipe])
        self.frame_dmap = self.frame_dmap.opts(
            xlim=(-10, 10),
            ylim=(0.5, 2.5),
            height=200,
            width=500,
            xaxis=None,
            yaxis=None,
            title="Best solution",
        )
        example = pd.DataFrame({"reward": []})
        self.stream = Stream()
        self.buffer_df = DataFrame(stream=self.stream, example=example)
        self.score_dmap = self.buffer_df.hvplot(y=["reward"]).opts(
            height=200, width=400, title="Best value found")
Exemple #2
0
class DistributedLennardJonnes(DistributedSwarm):
    def stream_progress(self, state, observation, reward):
        ix = self.n_iters // self.n_swarms
        example = pd.DataFrame({"reward": [reward]}, index=[ix])
        self.stream.emit(example)
        # msg_obs = "Best solution found:\n {}".format(numpy.round(observation, 2).tolist())
        msg_reward = "Best value found: {:.4f}".format(reward)
        data = [[ix * 0.5, self.init_reward - 3, msg_reward]]
        self.label_pipe.send(pd.DataFrame(data, columns=["x", "y", "label"]))
        if self.best[0] is not None:
            x = self.best[0].reshape(-1, 3)
            d = {
                "x": x[:, 0].copy().tolist(),
                "y": x[:, 1].copy().tolist(),
                "z": x[:, 2].copy().tolist(),
            }
            self.best_pipe.send(d)

    def plot(self):
        return self.best_dmap + self.label_dmap * self.score_dmap

    def init_plot(self):
        hv.extension("plotly")
        self.best_pipe = Pipe(data=[])
        self.best_dmap = hv.DynamicMap(hv.Scatter3D, streams=[self.best_pipe])
        self.best_dmap = self.best_dmap.opts(
            xlim=(-2, 2),
            ylim=(-2, 4),
            color="red",
            alpha=0.7,
            # height=600, width=600,
            xaxis=None,
            yaxis=None,
            title="Best solution",
        )
        self.label_pipe = Pipe(data=[])
        self.label_dmap = hv.DynamicMap(hv.Labels, streams=[self.label_pipe])
        self.label_dmap = self.label_dmap.opts(
            # height=200, width=400,
            xaxis=None,
            yaxis=None,
            title="Best solution",
        )
        example = pd.DataFrame({"reward": []})
        self.stream = Stream()
        self.buffer_df = DataFrame(stream=self.stream, example=example)
        self.score_dmap = self.buffer_df.hvplot(
            y=["reward"]).opts(  # height=200, width=400,
                title="Best value found")
Exemple #3
0
class DistributedSwarm:
    def __init__(
        self,
        swarm: Callable,
        n_swarms: int,
        n_param_servers: int,
        max_iters_ray: int = 10,
        log_every: int = 100,
        n_comp_add: int = 5,
        minimize: bool = False,
        ps_maxlen: int = 100,
        init_reward: float = None,
        log_reward: bool = False,
    ):
        self.n_swarms = n_swarms
        self.minimize = minimize
        self.log = log_reward
        self.init_reward = (init_reward if init_reward is not None else
                            (np.inf if minimize else -np.inf))
        self.log_every = log_every
        self.param_servers = [
            ParamServer.remote(minimize=minimize, maxlen=ps_maxlen)
            for _ in range(n_param_servers)
        ]
        self.swarms = [
            RemoteSwarm.remote(copy.copy(swarm),
                               int(n_comp_add),
                               minimize=minimize) for _ in range(self.n_swarms)
        ]
        self.max_iters_ray = max_iters_ray
        self.frame_pipe: Pipe = None
        self.stream = None
        self.buffer_df = None
        self.score_dmap = None
        self.frame_dmap = None
        self.init_plot()
        self.n_iters = 0
        self.best = (None, None, None)

    def init_plot(self):
        self.frame_pipe = Pipe(data=[])
        self.frame_dmap = hv.DynamicMap(hv.RGB, streams=[self.frame_pipe])
        self.frame_dmap = self.frame_dmap.opts(xlim=(-0.5, 0.5),
                                               ylim=(-0.5, 0.5),
                                               xaxis=None,
                                               yaxis=None,
                                               title="Game screen")
        example = pd.DataFrame({"reward": []})
        self.stream = Stream()
        self.buffer_df = DataFrame(stream=self.stream, example=example)
        self.score_dmap = self.buffer_df.hvplot(y=["reward"]).opts(
            height=200, width=500, title="Game score")

    def plot(self):
        return self.frame_dmap + self.score_dmap

    def stream_progress(self, state, observation, reward):
        example = pd.DataFrame({"reward": [reward]},
                               index=[self.n_iters // self.n_swarms])
        self.stream.emit(example)
        obs = observation.reshape((210, 160, 3)).astype(np.uint8)
        self.frame_pipe.send(obs)

    def run_swarm(self):
        self.n_iters = 0
        best_ids = [s.reset.remote() for s in self.swarms]
        steps = {}
        param_servers = deque([])
        for worker, best in zip(self.swarms, best_ids):
            steps[worker.make_iteration.remote(best)] = worker

        bests = []
        for ps, walker in zip(self.param_servers,
                              list(steps.keys())[:len(self.param_servers)]):
            bests.append(ps.exchange_walker.remote(walker))
            param_servers.append(ps)
        ray.get(bests)

        for i in range(self.max_iters_ray * len(self.swarms)):
            self.n_iters += 1
            ready_bests, _ = ray.wait(list(steps))
            ready_best_id = ready_bests[0]
            worker = steps.pop(ready_best_id)
            ps = param_servers.popleft()

            new_best = ps.exchange_walker.remote(ready_best_id)
            param_servers.append(ps)
            steps[worker.make_iteration.remote(new_best)] = worker

            if i % (self.log_every * len(self.swarms)) == 0:
                id_, _ = ray.wait([param_servers[-1].get_best.remote()])
                (state, best_obs, best_reward) = ray.get(id_)[0]
                if state is not None:
                    self.best = (state, best_obs, float(best_reward))
                    if ((best_reward > self.init_reward) if self.minimize else
                        (best_reward < self.init_reward)):
                        best_reward = self.init_reward
                    best_reward = np.log(
                        best_reward) if self.log else best_reward
                    self.stream_progress(state, best_obs, best_reward)
                else:
                    print("skipping, not ready")