def optimizer(self):
        opt = get_optimizer(self.optimizer_config)

        if self.quantizer_config['name'] == 'linear' and eval(
                self.quantizer_config['W_opts']['stop_grad']):
            self.add_stop_grad()
            opt = optimizer.apply_grad_processors(
                opt, [gradproc.MapGradient(self.stop_grad)])
        if self.quantizer_config['name'] == 'linear' and eval(
                self.quantizer_config['W_opts']['centralized']):
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cent':
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cluster' and eval(
                self.load_config['clustering']):
            opt = optimizer.apply_grad_processors(
                opt, [gradproc.MapGradient(self.clustering)])
        if self.quantizer_config['name'] == 'linear' and eval(
                self.quantizer_config['W_opts']['pruning']):
            self.add_masking_update()
            opt = optimizer.PostProcessOptimizer(opt, self.masking)
        if int(self.quantizer_config['BITA']
               ) != 32 and self.quantizer_config['name'] == 'intQ':
            self.add_new_cs_update()
            opt = optimizer.PostProcessOptimizer(opt, self.ema)
        return opt
    def optimizer(self):
        opt = get_optimizer(self.optimizer_config)
        '''
        if self.optimizer_config['second'] != None:
            temp = {'name': self.optimizer_config['second']}
            opt2 = get_optimizer(temp)

            choose = tf.get_variable('select_opt', initializer=False, dtype=tf.bool)
            opt = tf.cond(choose, opt2, opt)
        '''
        
        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['stop_grad']):
            self.add_stop_grad()
            opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.stop_grad)])
        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['centralized']):
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cent':
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cluster' and eval(self.load_config['clustering']):
            opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.clustering)])
        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['pruning']):
            self.add_masking_update()
            opt = optimizer.PostProcessOptimizer(opt, self.masking)
        return opt
Exemple #3
0
 def optimizer(self):
     opt = tf.train.AdamOptimizer(self.cfg.learning_rate)
     return optimizer.apply_grad_processors(opt, [
         gradproc.MapGradient(
             lambda grad: tf.clip_by_average_norm(grad, 0.3)),
         gradproc.SummaryGradient()
     ])
Exemple #4
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
     # opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(
         opt, [
             # gradproc.GlobalNormClip(2.0),
             gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
               gradproc.SummaryGradient()])
Exemple #5
0
    def optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
        # This will also put the summary in tensorboard, stat.json and print in terminal,
        # but this time without moving average
        tf.summary.scalar('lr', lr)
        # opt = tf.train.MomentumOptimizer(lr, 0.9)
        opt = tf.train.AdamOptimizer(lr)

        return optimizer.apply_grad_processors(
            opt, [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
                  gradproc.SummaryGradient()])
Exemple #6
0
 def _get_optimizer(self):
     gradprocs = [
         FilterGradientVariables('.*net2.*', verbose=False),
         gradproc.MapGradient(
             lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)),
         gradproc.GlobalNormClip(hp.train2.clip_norm),
         # gradproc.PrintGradient(),
         # gradproc.CheckGradient(),
     ]
     lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False)
     opt = tf.train.AdamOptimizer(learning_rate=lr)
     return optimizer.apply_grad_processors(opt, gradprocs)
 def _get_optimizer(self):
     gradprocs = [
         tensorpack_extension.FilterGradientVariables('.*net2.*', verbose=False),
         gradproc.MapGradient(
             lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)),
         gradproc.GlobalNormClip(hp.train2.clip_norm),
         # gradproc.PrintGradient(),
         # gradproc.CheckGradient(),
     ]
     global_step = tf.Variable(0, name='global_step',trainable=False)
     #self.lr = self.learning_rate_decay(global_step, hp.train2.lr)
     #lr = learning_rate_decay(initial_lr = hp.train2.lr, global_step)
     lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False)
     opt = tf.train.AdamOptimizer(learning_rate=lr)
     return optimizer.apply_grad_processors(opt, gradprocs)