Example #1
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [
             gradproc.ScaleGradient(('STN.*', 0.1)),
             gradproc.SummaryGradient()])
Example #2
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=self.learning_rate,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(10),
               gradproc.SummaryGradient()])
Example #3
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=3e-5,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(opt, [
         gradproc.ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])
     ])
Example #4
0
    def optimizer(self):
        opt = get_optimizer(self.optimizer_config)

        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['stop_grad']):
            self.add_stop_grad()
            opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.stop_grad)])
        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['centralized']):
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cent':
            self.add_centralizing_update()
            opt = optimizer.PostProcessOptimizer(opt, self.centralizing)
        if self.quantizer_config['name'] == 'cluster' and eval(self.load_config['clustering']):
            opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.clustering)])
        if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['pruning']):
            self.add_masking_update()
            opt = optimizer.PostProcessOptimizer(opt, self.masking)
        return opt
 def optimizer(self):
     lr = tf.get_variable("learning_rate",
                          initializer=0.0002,
                          trainable=False)
     tf.summary.scalar("learning_rate", lr)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.999)
     return optimizer.apply_grad_processors(
         opt, [gradproc.SummaryGradient(),
               gradproc.CheckGradient()])
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=5e-4,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(opt, [
         gradproc.ScaleGradient(('STN.*', 0.1)),
         gradproc.SummaryGradient()
     ])
Example #7
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
     # opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(
         opt, [
             # gradproc.GlobalNormClip(2.0),
             gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
               gradproc.SummaryGradient()])
Example #8
0
 def _get_optimizer(self):
     conf = Config()
     lr = tf.get_variable('learning_rate',
                          initializer=conf.learning_rate,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     tf.summary.scalar('learning_rate', lr)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(conf.max_grad_norm)])
Example #9
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=2e-4,
                          dtype=tf.float32,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
     # generator learns 5 times faster
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('gen/.*', 5))])
Example #10
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=True)
     tf.summary.scalar('lr', lr)
     opt = tf.train.AdamOptimizer(lr)
     if self.stage == 1:
         return opt
     else:
         # Only apply Global Norm Clipping to gradients of ConvLSTM
         return apply_grad_processors(opt, [FilteredGlobalNormClip(3, 'ConvLSTM')])
Example #11
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=1e-3,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     # gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
     #              SummaryGradient()]
     gradprocs = [MapGradient(lambda grad: grad), SummaryGradient()]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Example #12
0
 def optimizer(self):
     lr = tf.get_variable('a3c_learning_rate',
                          initializer=1e-4,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     gradprocs = [
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.3))
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Example #13
0
 def optimizer(self):
     lr = tf.train.exponential_decay(self.base_lr,
                                     global_step=get_global_step_var(),
                                     decay_steps=self.decay_steps,
                                     decay_rate=self.decay_rate,
                                     name='learning-rate')
     opt = tf.train.RMSPropOptimizer(learning_rate=lr)
     tf.summary.scalar('lr', lr)
     return optimizer.apply_grad_processors(opt,
                                            [gradproc.SummaryGradient()])
Example #14
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=1e-3,
                          trainable=False)
     tf.summary.scalar("learning_rate", lr)
     opt = tf.train.RMSPropOptimizer(lr,
                                     decay=0.95,
                                     momentum=0.95,
                                     epsilon=1e-2)
     return optimizer.apply_grad_processors(opt,
                                            [gradproc.SummaryGradient()])
Example #15
0
    def optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
        # This will also put the summary in tensorboard, stat.json and print in terminal,
        # but this time without moving average
        tf.summary.scalar('lr', lr)
        # opt = tf.train.MomentumOptimizer(lr, 0.9)
        opt = tf.train.AdamOptimizer(lr)

        return optimizer.apply_grad_processors(
            opt, [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
                  gradproc.SummaryGradient()])
Example #16
0
 def _get_optimizer(self):
     gradprocs = [
         FilterGradientVariables('.*net2.*', verbose=False),
         gradproc.MapGradient(
             lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)),
         gradproc.GlobalNormClip(hp.train2.clip_norm),
         # gradproc.PrintGradient(),
         # gradproc.CheckGradient(),
     ]
     lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False)
     opt = tf.train.AdamOptimizer(learning_rate=lr)
     return optimizer.apply_grad_processors(opt, gradprocs)
Example #17
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate',
                          initializer=args.base_lr,
                          trainable=False)
     opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
     add_moving_summary(lr)
     if args.load:
         gradprocs = [
             gradproc.ScaleGradient([('conv.*', 0.1), ('fc.*', 0.1)])
         ]
         return optimizer.apply_grad_processors(opt, gradprocs)
     else:
         return opt
Example #18
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True)

        factor = get_batch_factor()
        if factor != 1:
            lr = lr / float(factor)
            opt = tf.train.MomentumOptimizer(lr, 0.9)
            opt = optimizer.AccumGradOptimizer(opt, factor)
        else:
            opt = tf.train.MomentumOptimizer(lr, 0.9)
        return opt
        return optimizer.apply_grad_processors(
            opt, [gradproc.ScaleGradient(('.*/b', 2))])
Example #19
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/'+name, init_lr, summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Example #20
0
 def _get_optimizer(self):
     gradprocs = [
         tensorpack_extension.FilterGradientVariables('.*net2.*', verbose=False),
         gradproc.MapGradient(
             lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)),
         gradproc.GlobalNormClip(hp.train2.clip_norm),
         # gradproc.PrintGradient(),
         # gradproc.CheckGradient(),
     ]
     global_step = tf.Variable(0, name='global_step',trainable=False)
     #self.lr = self.learning_rate_decay(global_step, hp.train2.lr)
     #lr = learning_rate_decay(initial_lr = hp.train2.lr, global_step)
     lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False)
     opt = tf.train.AdamOptimizer(learning_rate=lr)
     return optimizer.apply_grad_processors(opt, gradprocs)
Example #21
0
 def _get_optimizer(self):
     lr = get_scalar_var('learning_rate', cfg.base_lr, summary=True)
     opt = tf.train.MomentumOptimizer(learning_rate=lr,
                                      momentum=cfg.momentum)
     gradprocs = [
         gradproc.ScaleGradient([('conv.*/W', 1),
                                 ('conv.*/b', cfg.bias_lr_mult),
                                 ('bottleneck.*/W', 1),
                                 ('bottleneck.*/b', cfg.bias_lr_mult),
                                 ('stage_1.*/W', 1),
                                 ('stage_1.*/b', cfg.bias_lr_mult),
                                 ('stage_[2-6].*/W', cfg.lr_mult),
                                 ('stage_[2-6].*/b',
                                  cfg.lr_mult * cfg.bias_lr_mult)])
     ]
     return optimizer.apply_grad_processors(opt, gradprocs)
Example #22
0
    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate',
                             initializer=0.003,
                             trainable=False)
        tf.summary.scalar('learning_rate', lr)

        factor = get_batch_factor()
        if factor != 1:
            lr = lr / float(factor)
            opt = tf.train.MomentumOptimizer(lr, 0.9)
            opt = optimizer.AccumGradOptimizer(opt, factor)
        else:
            opt = tf.train.MomentumOptimizer(lr, 0.9)
        return opt
        return optimizer.apply_grad_processors(
            opt, [gradproc.ScaleGradient(('.*/b', 2))])
Example #23
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                     regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                     regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Example #24
0
 def _get_optimizer(self, name):
     from tensorpack.tfutils import optimizer
     from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient
     init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C
     import tensorpack.tfutils.symbolic_functions as symbf
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                     regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                     regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Example #25
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
     # generator learns 5 times faster
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('gen/.*', 5))])
Example #26
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(10), gradproc.SummaryGradient()])
Example #27
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
     # generator learns 5 times faster
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('gen/.*', 5))])
Example #28
0
 def optimizer(self):
     lr = tf.get_variable("learning_rate",
                          initializer=self.lr,
                          trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
Example #29
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
     opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
     gradprocs = [gradproc.ScaleGradient(
         [('conv0.*', 0.1), ('group[0-2].*', 0.1)])]
     return optimizer.apply_grad_processors(opt, gradprocs)
Example #30
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True)
     opt = tf.train.MomentumOptimizer(lr, 0.9)
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('.*/b', 2))])
Example #31
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False)
     opt = tf.train.GradientDescentOptimizer(lr)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(5)])
Example #32
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
     opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5)
     return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
Example #33
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(
             [('convfcweight.*', 0.1), ('conv5_.*', 5)])])
Example #34
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(10),
               gradproc.SummaryGradient()])
Example #35
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
     opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5)
     return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
Example #36
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
     opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
     gradprocs = [gradproc.ScaleGradient(
         [('conv0.*', 0.1), ('group[0-2].*', 0.1)])]
     return optimizer.apply_grad_processors(opt, gradprocs)
Example #37
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False)
     opt = tf.train.GradientDescentOptimizer(lr)
     return optimizer.apply_grad_processors(opt,
                                            [gradproc.GlobalNormClip(5)])
Example #38
0
 def _get_optimizer(self):
     lr = symbolic_functions.get_scalar_var('learning_rate',
                                            2e-3,
                                            summary=True)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
Example #39
0
 def _get_optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])