def build(self, name='r2d2'): """Build the distributed agent topology.""" program = lp.Program(name=name) with program.group('replay'): replay = program.add_node(lp.ReverbNode(self.replay)) with program.group('counter'): counter = program.add_node(lp.CourierNode(self.counter)) with program.group('learner'): learner = program.add_node(lp.CourierNode(self.learner, replay, counter)) with program.group('cacher'): cacher = program.add_node( lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000)) with program.group('evaluator'): program.add_node(lp.CourierNode(self.evaluator, cacher, counter)) # Generate an epsilon for each actor. epsilons = np.flip(np.logspace(1, 8, self._num_actors, base=0.4), axis=0) with program.group('actor'): for epsilon in epsilons: program.add_node( lp.CourierNode(self.actor, replay, cacher, counter, epsilon)) return program
def build(self, name='impala'): """Build the distributed agent topology.""" program = lp.Program(name=name) with program.group('replay'): queue = program.add_node(lp.ReverbNode(self.queue)) with program.group('counter'): counter = program.add_node(lp.CourierNode(self.counter)) with program.group('learner'): learner = program.add_node( lp.CourierNode(self.learner, queue, counter)) with program.group('evaluator'): program.add_node(lp.CourierNode(self.evaluator, learner, counter)) with program.group('cacher'): cacher = program.add_node( lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000)) with program.group('actor'): for _ in range(self._num_actors): program.add_node(lp.CourierNode(self.actor, queue, cacher, counter)) return program
def build(self, name: str = "madqn") -> Any: """Build the distributed system as a graph program. Args: name (str, optional): system name. Defaults to "madqn". Returns: Any: graph program for distributed system training. """ program = lp.Program(name=name) with program.group("replay"): replay = program.add_node(lp.ReverbNode(self.replay)) with program.group("counter"): counter = program.add_node(lp.CourierNode(self.counter, self._checkpoint)) if self._max_executor_steps: with program.group("coordinator"): _ = program.add_node(lp.CourierNode(self.coordinator, counter)) with program.group("trainer"): trainer = program.add_node(lp.CourierNode(self.trainer, replay, counter)) with program.group("evaluator"): program.add_node(lp.CourierNode(self.evaluator, trainer, counter, trainer)) if not self._num_caches: # Use the trainer as a single variable source. sources = [trainer] else: with program.group("cacher"): # Create a set of trainer caches. sources = [] for _ in range(self._num_caches): cacher = program.add_node( lp.CacherNode( trainer, refresh_interval_ms=2000, stale_after_ms=4000 ) ) sources.append(cacher) with program.group("executor"): # Add executors which pull round-robin from our variable sources. for executor_id in range(self._num_exectors): source = sources[executor_id % len(sources)] program.add_node( lp.CourierNode( self.executor, executor_id, replay, source, counter, trainer, ) ) return program
def build(self, name='dqn'): """Build the distributed agent topology.""" program = lp.Program(name=name) with program.group('replay'): replay = program.add_node(lp.ReverbNode(self.replay)) with program.group('counter'): counter = program.add_node(lp.CourierNode(self.counter)) if self._max_actor_steps: program.add_node( lp.CourierNode(self.coordinator, counter, self._max_actor_steps)) with program.group('learner'): learner = program.add_node( lp.CourierNode(self.learner, replay, counter)) with program.group('evaluator'): program.add_node(lp.CourierNode(self.evaluator, learner, counter)) # Generate an epsilon for each actor. epsilons = np.flip(np.logspace(1, 8, self._num_actors, base=0.4), axis=0) with program.group('cacher'): # Create a set of learner caches. sources = [] for _ in range(self._num_caches): cacher = program.add_node( lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000)) sources.append(cacher) with program.group('actor'): # Add actors which pull round-robin from our variable sources. for actor_id, epsilon in enumerate(epsilons): source = sources[actor_id % len(sources)] program.add_node( lp.CourierNode(self.actor, replay, source, counter, epsilon)) return program
def build(self, name='dmpo'): """Build the distributed agent topology.""" program = lp.Program(name=name) with program.group('replay'): replay = program.add_node(lp.ReverbNode(self.replay)) with program.group('counter'): counter = program.add_node(lp.CourierNode(self.counter)) if self._max_actor_steps: _ = program.add_node( lp.CourierNode(self.coordinator, counter, self._max_actor_steps)) with program.group('learner'): learner = program.add_node( lp.CourierNode(self.learner, replay, counter)) with program.group('evaluator'): program.add_node(lp.CourierNode(self.evaluator, learner, counter)) if not self._num_caches: # Use our learner as a single variable source. sources = [learner] else: with program.group('cacher'): # Create a set of learner caches. sources = [] for _ in range(self._num_caches): cacher = program.add_node( lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000)) sources.append(cacher) with program.group('actor'): # Add actors which pull round-robin from our variable sources. for actor_id in range(self._num_actors): source = sources[actor_id % len(sources)] program.add_node( lp.CourierNode(self.actor, replay, source, counter, actor_id)) return program