Ejemplo n.º 1
0
class Sword(Ascend):
   '''Client level infrastructure demo

   This environment server runs a subset of the
   agents associated with a single server and
   computes model updates over collected rollouts

   At small scale, each server is collocated with a
   single client on the same CPU core. For larger
   experiments with multiple clients, decorate this
   class with @ray.remote to enable sharding.'''

   def __init__(self, trinity, config, idx):
      '''Initializes a model and relevent utilities
                                                                              
      Args:                                                                   
         trinity : A Trinity object as shown in __main__                      
         config  : A Config object as shown in __main__                       
         idx     : Unused hardware index                                      
      '''
      super().__init__(disciple=None, n=0)
      config        = deepcopy(config)
      device        = config.DEVICE
      self.config   = config

      self.net      = projekt.Policy(config).to(device)
      self.manager  = RolloutManager(config)

   @runtime
   def step(self, packet, weights, backward):
      '''Synchronizes weights from upstream; computes
      agent decisions; computes policy updates.
                                                                              
      Args:                                                                   
         packet   : An IO object specifying observations
         weights  : An optional parameter vector to replace model weights
         backward : (bool) Whether of not a backward pass should be performed  

      Returns:                                                                   
         data    : The same IO object populated with action decisions
         grads   : A vector of gradients aggregated across trajectories
         summary : A BlobSummary object logging agent statistics
      '''   
      grads, blobs = None, None

      #Sync model weights; batch obs; compute forward pass
      setParameters(self.net, weights)
      self.manager.collectInputs(packet)
      self.net(packet, self.manager)
  
      #Compute backward pass and logs from full rollouts,
      #discarding any partial trajectories
      if backward and not self.config.TEST:
         rollouts, blobs = self.manager.step()
         optim.backward(rollouts, self.config)
         #self.manager.inputs.clear()
         grads = self.net.grads()

      return packet, grads, blobs
Ejemplo n.º 2
0
    def __init__(self, trin, config, args, idx):
        '''Initializes a model and relevent utilities'''
        super().__init__(trin, config, args, idx)
        self.config, self.args = config, args

        self.manager = RolloutManager()

        self.net = projekt.ANN(config).to(self.config.DEVICE)
Ejemplo n.º 3
0
   def __init__(self, trinity, config, idx):
      '''Initializes a model and relevent utilities'''
      super().__init__(None, 0)
      config        = deepcopy(config)
      config.DEVICE = 'cpu:0'

      self.config   = config
      self.ent      = 0

      self.keys = set()

      self.net     = projekt.ANN(config)
      self.manager = RolloutManager()
Ejemplo n.º 4
0
   def __init__(self, trinity, config, idx):
      '''Initializes a model and relevent utilities
                                                                              
      Args:                                                                   
         trinity : A Trinity object as shown in __main__                      
         config  : A Config object as shown in __main__                       
         idx     : Unused hardware index                                      
      '''
      super().__init__(disciple=None, n=0)
      config        = deepcopy(config)
      device        = config.DEVICE
      self.config   = config

      self.net      = projekt.Policy(config).to(device)
      self.manager  = RolloutManager(config)
Ejemplo n.º 5
0
    def __init__(self, trin, config, args, idx):
        '''Initializes a model, env, and relevent utilities'''

        super().__init__(trin, config, args, idx)
        config = deepcopy(config)
        config.DEVICE = 'cpu:0'

        self.config = config
        self.args = args
        self.ent = 0

        self.net = projekt.ANN(config)
        self.obs, _, _, _ = self.env.reset()

        #For the renderer
        self.manager = RolloutManager()
Ejemplo n.º 6
0
class God(trinity.God):
    '''Server level God API demo

   This server level optimizer node aggregates experience
   across all core level rollout worker nodes. It
   uses the aggregated experience compute gradients.

   This is effectively a lightweight variant of the
   Rapid computation model, with the potential notable
   difference that we also recompute the forward pass
   from small observation buffers rather than 
   communicating large activation tensors.

   This demo builds up the ExperienceBuffer utility, 
   which handles rollout batching.'''
    def __init__(self, trin, config, args, idx):
        '''Initializes a model and relevent utilities'''
        super().__init__(trin, config, args, idx)
        self.config, self.args = config, args

        self.manager = RolloutManager()

        self.net = projekt.ANN(config).to(self.config.DEVICE)

    @runtime
    def step(self, recv):
        '''Broadcasts updated weights to the core level
      Sword rollout workers. Runs rollout workers'''
        self.net.recvUpdate(recv)
        self.rollouts(recv)

        #Send update
        grads = self.net.grads()
        logs, nUpdates, nRollouts = self.manager.reset()
        return grads, logs, nUpdates, nRollouts

    def rollouts(self, recv):
        '''Runs rollout workers while asynchronously
      computing gradients over available experience'''
        self.nRollouts, done = 0, False
        while not done:
            packets = super().distrib(recv)  #async rollout workers
            self.processRollouts()  #intermediate gradient computatation
            packets = super().sync(packets)  #sync next batches of experience
            self.manager.recv(packets)

            done = self.manager.nUpdates >= self.config.OPTIMUPDATES
        self.processRollouts()  #Last batch of gradients

    def processRollouts(self):
        '''Runs minibatch forwards/backwards
      over all available experience'''
        for batch in self.manager.batched(self.config.OPTIMBATCH,
                                          forOptim=True):
            rollouts = self.forward(*batch)
            self.backward(rollouts)

    def forward(self, pop, rollouts, data):
        '''Recompute forward pass and assemble rollout objects'''
        keys, _, stims, rawActions, actions, rewards, dones = data
        _, outs, vals = self.net(pop, stims, atnArgs=actions)

        #Unpack outputs
        atnTensor, idxTensor, atnKeyTensor, lenTensor = actions
        lens, lenTensor = lenTensor
        atnOuts = utils.unpack(outs, lenTensor, dim=1)

        #Collect rollouts
        for key, out, atn, val, reward, done in zip(keys, outs, rawActions,
                                                    vals, rewards, dones):

            atnKey, lens, atn = list(
                zip(*[(k, len(e), idx) for k, e, idx in atn]))

            atn = np.array(atn)
            out = utils.unpack(out, lens)

            self.manager.fill(key, (atnKey, atn, out), val, done)

        return rollouts

    def backward(self, rollouts):
        '''Compute backward pass and logs from rollout objects'''
        reward, val, pg, valLoss, entropy = optim.backward(
            rollouts,
            valWeight=0.25,
            entWeight=self.config.ENTROPY,
            device=self.config.DEVICE)
Ejemplo n.º 7
0
class Sword(trinity.Sword):
    '''Core level Sword API demo

   This core level rollout worker node runs
   a copy of the environment and all associated
   agents. Multiple Swords return observations, 
   actions, and rewards to each server level 
   optimizer node.'''
    def __init__(self, trin, config, args, idx):
        '''Initializes a model, env, and relevent utilities'''

        super().__init__(trin, config, args, idx)
        config = deepcopy(config)
        config.DEVICE = 'cpu:0'

        self.config = config
        self.args = args
        self.ent = 0

        self.net = projekt.ANN(config)
        self.obs, _, _, _ = self.env.reset()

        #For the renderer
        self.manager = RolloutManager()

    @runtime
    def step(self, packet=None):
        '''Synchronizes weights from upstream and
      collects a fixed amount of experience.'''
        self.net.recvUpdate(packet)

        while self.manager.nUpdates < self.config.SYNCUPDATES:
            self.tick()

        return self.manager.send()

    def tick(self):
        '''Steps the agent and environment

      Processes observations, selects actions, and
      steps the environment to obtain new observations.
      Serializes (obs, action, reward) triplets for
      communication to an upstream optimizer node.'''

        #Batch observations and make decisions
        stims = Stimulus.process(self.obs)
        self.manager.collectInputs(self.env, self.obs, stims)

        actions, outs = [], []
        for batch in self.manager.batched(self.config.SYNCBATCH):
            pop, rollouts, batch = batch
            keys, obs, stim, _, _, _, _ = batch

            #Run the policy
            atns, out, _ = self.net(pop, stim, obs=obs)
            actions += atns
            outs += out

        #Step the environment and all agents at once.
        #The environment handles action priotization etc.
        actions = dict(((o[1].entID, a) for o, a in zip(self.obs, actions)))
        nxtObs, rewards, dones, info = super().step(actions)

        #Update the experience buffer
        #The envrionment is used to generate serialization keys
        self.manager.collectOutputs(self.env, self.obs, outs, rewards, dones)
        self.obs = nxtObs
Ejemplo n.º 8
0
class Sword(Ascend):
   '''Core level Sword API demo

   This core level client node maintains a
   full copy of the model. It runs and computes
   updates for the associated policies of all
   agents.'''

   def __init__(self, trinity, config, idx):
      '''Initializes a model and relevent utilities'''
      super().__init__(None, 0)
      config        = deepcopy(config)
      config.DEVICE = 'cpu:0'

      self.config   = config
      self.ent      = 0

      self.keys = set()

      self.net     = projekt.ANN(config)
      self.manager = RolloutManager()

   @runtime
   def step(self, obs, packet=None):
      '''Synchronizes weights from upstream; computes
      agent decisions; computes policy updates.
      
      A few bug notes:
         1. It appears pytorch errors in .backward when batching
         data. This is because the graph is retained over all
         trajectories in the batch, even though only some are
         finished.
         
         2. Currently specifying retain_graph. This should not be
         required with batch size 1, even with the above bug.
      '''
      #Sync weights    
      self.net.recvUpdate(packet)

      config  = self.config
      actions = {}

      #Batch observations
      self.manager.collectInputs(obs)

      #Compute forward pass
      for pop, batch in self.manager.batched():
         keys, stim, atns = batch

         #Run the policy
         atns, atnsIdx, vals = self.net(pop, stim, atns)

         #Clear .backward buffers during test
         if self.config.TEST or self.config.POPOPT:
            #atns are detached in torch/io/action
            atnsIdx = atnsIdx.detach()
            vals    = vals.detach()

         #Collect output actions and values for .backward
         for key, atn, atnIdx, val in zip(keys, atns, atnsIdx, vals):
            out = Output(key, atn, atnIdx, val)
            actions.update(out.action)
            self.manager.collectOutputs([out])
         
      #Compute backward pass and logs from rollout objects
      if self.manager.nUpdates >= config.CLIENT_UPDATES:
         rollouts, logs = self.manager.step()

         if config.TEST or config.POPOPT:
            return actions, None, logs

         optim.backward(rollouts, valWeight=config.VAL_WEIGHT,
            entWeight=config.ENTROPY, device=config.DEVICE)
         grads = self.net.grads()
         return actions, grads, logs

      return actions, None, None