def optimizer(self): lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [ gradproc.ScaleGradient(('STN.*', 0.1)), gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [gradproc.GlobalNormClip(10), gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors(opt, [ gradproc.ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ])
def optimizer(self): opt = get_optimizer(self.optimizer_config) if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['stop_grad']): self.add_stop_grad() opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.stop_grad)]) if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['centralized']): self.add_centralizing_update() opt = optimizer.PostProcessOptimizer(opt, self.centralizing) if self.quantizer_config['name'] == 'cent': self.add_centralizing_update() opt = optimizer.PostProcessOptimizer(opt, self.centralizing) if self.quantizer_config['name'] == 'cluster' and eval(self.load_config['clustering']): opt = optimizer.apply_grad_processors(opt, [gradproc.MapGradient(self.clustering)]) if self.quantizer_config['name'] == 'linear' and eval(self.quantizer_config['W_opts']['pruning']): self.add_masking_update() opt = optimizer.PostProcessOptimizer(opt, self.masking) return opt
def optimizer(self): lr = tf.get_variable("learning_rate", initializer=0.0002, trainable=False) tf.summary.scalar("learning_rate", lr) opt = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.999) return optimizer.apply_grad_processors( opt, [gradproc.SummaryGradient(), gradproc.CheckGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors(opt, [ gradproc.ScaleGradient(('STN.*', 0.1)), gradproc.SummaryGradient() ])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) # opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors( opt, [ # gradproc.GlobalNormClip(2.0), gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)), gradproc.SummaryGradient()])
def _get_optimizer(self): conf = Config() lr = tf.get_variable('learning_rate', initializer=conf.learning_rate, trainable=False) opt = tf.train.AdamOptimizer(lr) tf.summary.scalar('learning_rate', lr) return optimizer.apply_grad_processors( opt, [gradproc.GlobalNormClip(conf.max_grad_norm)])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False) opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6) # generator learns 5 times faster return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('gen/.*', 5))])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=True) tf.summary.scalar('lr', lr) opt = tf.train.AdamOptimizer(lr) if self.stage == 1: return opt else: # Only apply Global Norm Clipping to gradients of ConvLSTM return apply_grad_processors(opt, [FilteredGlobalNormClip(3, 'ConvLSTM')])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False) opt = tf.train.AdamOptimizer(lr) # gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)), # SummaryGradient()] gradprocs = [MapGradient(lambda grad: grad), SummaryGradient()] opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def optimizer(self): lr = tf.get_variable('a3c_learning_rate', initializer=1e-4, trainable=False) opt = tf.train.AdamOptimizer(lr) gradprocs = [ MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.3)) ] opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def optimizer(self): lr = tf.train.exponential_decay(self.base_lr, global_step=get_global_step_var(), decay_steps=self.decay_steps, decay_rate=self.decay_rate, name='learning-rate') opt = tf.train.RMSPropOptimizer(learning_rate=lr) tf.summary.scalar('lr', lr) return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False) tf.summary.scalar("learning_rate", lr) opt = tf.train.RMSPropOptimizer(lr, decay=0.95, momentum=0.95, epsilon=1e-2) return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False) # This will also put the summary in tensorboard, stat.json and print in terminal, # but this time without moving average tf.summary.scalar('lr', lr) # opt = tf.train.MomentumOptimizer(lr, 0.9) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors( opt, [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)), gradproc.SummaryGradient()])
def _get_optimizer(self): gradprocs = [ FilterGradientVariables('.*net2.*', verbose=False), gradproc.MapGradient( lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)), gradproc.GlobalNormClip(hp.train2.clip_norm), # gradproc.PrintGradient(), # gradproc.CheckGradient(), ] lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False) opt = tf.train.AdamOptimizer(learning_rate=lr) return optimizer.apply_grad_processors(opt, gradprocs)
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=args.base_lr, trainable=False) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) add_moving_summary(lr) if args.load: gradprocs = [ gradproc.ScaleGradient([('conv.*', 0.1), ('fc.*', 0.1)]) ] return optimizer.apply_grad_processors(opt, gradprocs) else: return opt
def _get_optimizer(self): lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True) factor = get_batch_factor() if factor != 1: lr = lr / float(factor) opt = tf.train.MomentumOptimizer(lr, 0.9) opt = optimizer.AccumGradOptimizer(opt, factor) else: opt = tf.train.MomentumOptimizer(lr, 0.9) return opt return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('.*/b', 2))])
def _get_opt(name, init_lr): lr = symbf.get_scalar_var('learning_rate/'+name, init_lr, summary=True) opt = tf.train.AdamOptimizer(lr) logger.info("create opt {}".format(name)) gradprocs = [ # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'), # GlobalNormClip(40.), SummaryGradient(), ] opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def _get_optimizer(self): gradprocs = [ tensorpack_extension.FilterGradientVariables('.*net2.*', verbose=False), gradproc.MapGradient( lambda grad: tf.clip_by_value(grad, hp.train2.clip_value_min, hp.train2.clip_value_max)), gradproc.GlobalNormClip(hp.train2.clip_norm), # gradproc.PrintGradient(), # gradproc.CheckGradient(), ] global_step = tf.Variable(0, name='global_step',trainable=False) #self.lr = self.learning_rate_decay(global_step, hp.train2.lr) #lr = learning_rate_decay(initial_lr = hp.train2.lr, global_step) lr = tf.get_variable('learning_rate', initializer=hp.train2.lr, trainable=False) opt = tf.train.AdamOptimizer(learning_rate=lr) return optimizer.apply_grad_processors(opt, gradprocs)
def _get_optimizer(self): lr = get_scalar_var('learning_rate', cfg.base_lr, summary=True) opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=cfg.momentum) gradprocs = [ gradproc.ScaleGradient([('conv.*/W', 1), ('conv.*/b', cfg.bias_lr_mult), ('bottleneck.*/W', 1), ('bottleneck.*/b', cfg.bias_lr_mult), ('stage_1.*/W', 1), ('stage_1.*/b', cfg.bias_lr_mult), ('stage_[2-6].*/W', cfg.lr_mult), ('stage_[2-6].*/b', cfg.lr_mult * cfg.bias_lr_mult)]) ] return optimizer.apply_grad_processors(opt, gradprocs)
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False) tf.summary.scalar('learning_rate', lr) factor = get_batch_factor() if factor != 1: lr = lr / float(factor) opt = tf.train.MomentumOptimizer(lr, 0.9) opt = optimizer.AccumGradOptimizer(opt, factor) else: opt = tf.train.MomentumOptimizer(lr, 0.9) return opt return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('.*/b', 2))])
def _get_opt(name, init_lr): lr = symbf.get_scalar_var('learning_rate/' + name, init_lr, summary=True) opt = tf.train.AdamOptimizer(lr) logger.info("create opt {}".format(name)) gradprocs = [ # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'), # GlobalNormClip(40.), SummaryGradient(), ] opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def _get_optimizer(self, name): from tensorpack.tfutils import optimizer from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C import tensorpack.tfutils.symbolic_functions as symbf lr = symbf.get_scalar_var('learning_rate/' + name, init_lr, summary=True) opt = tf.train.AdamOptimizer(lr) logger.info("create opt {}".format(name)) gradprocs = [ # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'), MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'), # GlobalNormClip(40.), SummaryGradient(), ] opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False) opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6) # generator learns 5 times faster return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('gen/.*', 5))])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [gradproc.GlobalNormClip(10), gradproc.SummaryGradient()])
def _get_optimizer(self): lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True) opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6) # generator learns 5 times faster return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('gen/.*', 5))])
def optimizer(self): lr = tf.get_variable("learning_rate", initializer=self.lr, trainable=False) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) gradprocs = [gradproc.ScaleGradient( [('conv0.*', 0.1), ('group[0-2].*', 0.1)])] return optimizer.apply_grad_processors(opt, gradprocs)
def _get_optimizer(self): lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True) opt = tf.train.MomentumOptimizer(lr, 0.9) return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient(('.*/b', 2))])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False) opt = tf.train.GradientDescentOptimizer(lr) return optimizer.apply_grad_processors( opt, [gradproc.GlobalNormClip(5)])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5) return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [gradproc.ScaleGradient( [('convfcweight.*', 0.1), ('conv5_.*', 5)])])
def _get_optimizer(self): lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) return optimizer.apply_grad_processors( opt, [gradproc.GlobalNormClip(10), gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5) return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) gradprocs = [gradproc.ScaleGradient( [('conv0.*', 0.1), ('group[0-2].*', 0.1)])] return optimizer.apply_grad_processors(opt, gradprocs)
def optimizer(self): lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False) opt = tf.train.GradientDescentOptimizer(lr) return optimizer.apply_grad_processors(opt, [gradproc.GlobalNormClip(5)])
def _get_optimizer(self): lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def _get_optimizer(self): lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False) opt = tf.train.AdamOptimizer(lr) return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])