Beispiel #1
0
class Pantheon:
    def __init__(self, config, args):
        self.start, self.tick, self.nANN = time.time(), 0, config.NPOP
        self.config, self.args = config, args
        self.net = Model(config, args)
        self.quill = Quill(config.MODELDIR)
        self.log = defaultdict(list)
        self.net.nParams

        self.period = 1

    @property
    def model(self):
        return self.net.model

    def step(self, recvs):
        recvs, logs = list(zip(*recvs))

        # Write logs
        self.quill.scrawl(logs)
        self.tick += 1

        if not self.config.TEST:
            lifetime = self.quill.latest()
            self.net.stepOpt(recvs)
            self.net.checkpoint(lifetime)
            self.net.saver.print()
        else:
            self.quill.print()

        return self.model
Beispiel #2
0
class Pantheon(Ascend):
    '''Cluster level Pantheon API demo

   This cluster level module aggregrates
   gradients across all server level optimizer
   nodes and updates model weights using Adam.

   Also demonstrates logging and snapshotting
   functionality through the Quill and Model
   libraries, respectively.'''
    def __init__(self, trinity, config, idx):
        '''Initializes a copy of the model, which keeps
      track of a copy of the weights for the optimizer.'''
        super().__init__(trinity.god, config.NGOD, trinity, config)
        self.config = config

        self.net = Model(projekt.ANN, config)

        #Have been experimenting with population based
        #training. Nothing stable yet -- advise avoiding
        if config.POPOPT:
            self.opt = PopulationOptimizer(self.net, config)
        else:
            self.opt = GradientOptimizer(self.net, config)

        if config.LOAD or config.BEST:
            self.net.load(self.opt, config.BEST)

        self.quill = Quill(config.MODELDIR)
        self.log = defaultdict(list)

        self.tick = 0
        self.net.nParams

    @runtime
    def step(self):
        '''Broadcasts updated weights to server level
      God optimizer nodes. Performs an Adam step
      once optimizers return a batch of gradients.'''

        recvs = super().step(self.net.weights)

        #Write logs using Quill
        recvs, logs = list(zip(*recvs))
        logs = BlobLogs.merge(logs)

        self.quill.scrawl(logs)
        self.tick += 1

        self.quill.print()
        if not self.config.TEST:
            lifetime = self.quill.latest()
            self.opt.step(recvs, logs)
            self.net.checkpoint(self.opt, lifetime)
Beispiel #3
0
class Pantheon(trinity.Pantheon):
    '''Cluster level Pantheon API demo

   This cluster level module aggregrates
   gradients across all server level optimizer
   nodes and updates model weights using Adam.

   Also demonstrates logging and snapshotting
   functionality through the Quill and Model
   libraries, respectively.'''
    def __init__(self, trinity, config, args):
        '''Initializes a copy of the model, which keeps
      track of a copy of the weights for the optimizer.'''
        super().__init__(trinity, config, args)
        self.config, self.args = config, args

        self.net = Model(projekt.ANN, config, args)
        self.quill = Quill(config.MODELDIR)
        self.log = defaultdict(list)

        self.tick = 0
        self.net.nParams

    @runtime
    def step(self):
        '''Broadcasts updated weights to server level
      God optimizer nodes. Performs an Adam step
      once optimizers return a batch of gradients.'''

        recvs = super().step(self.net.model)

        #Write logs using Quill
        recvs, logs, nUpdates, nRollouts = list(zip(*recvs))
        nUpdates = sum(nUpdates)
        nRollouts = sum(nRollouts)
        self.quill.scrawl(logs, nUpdates, nRollouts)
        self.tick += 1

        self.quill.print()
        if not self.config.TEST:
            lifetime = self.quill.latest()
            self.net.stepOpt(recvs)
            self.net.checkpoint(lifetime)
            self.net.saver.print()