def qa(x): name = x.op.name ema_name = name + '_ema' c = tf.get_variable(ema_name, initializer=0.0, dtype=tf.float32) max_a = tf.max(x) temp_c = max_a new_c = tf.cond(tf.get_global_step() == 0, temp_c, c * 0.99 + temp_c * 0.01, name=ema_name + '_new') #op = tf.assign(c, new_c, use_locking=False).op tf.add_to_collection('new_cs', new_c) n = 2**bitA - 1 lower = new_c * 0.05 upper = new_c * 0.95 x_temp = tf.conf(x < lower, tf.zeros_like(x), tf.clip_by_value(x, lower, upper * 0.9999)) x_temp = (n / 0.9 * new_c) * x_temp + (0.5 - (0.5 * n / 9)) x_temp = tf.round(x_temp) x_temp = x_temp / n * upper return x_temp, lambda dy: dy
def choose_optimizer(params, regularized_loss, trainable_var): if params['opt_alg'] == 'adam': optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adadelta': if params['decay_rate'] > 0: optimizer = tf.train.AdadeltaOptimizer( params['learning_rate'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # defaults 0.001, 0.95 optimizer = tf.train.AdadeltaOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adagrad': # also has initial_accumulator_value parameter optimizer = tf.train.AdagradOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adagradDA': # Be careful when using AdagradDA for deep networks as it will require careful initialization of the gradient # accumulators for it to train. optimizer = tf.train.AdagradDAOptimizer(params['learning_rate'], tf.get_global_step()).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'ftrl': # lots of hyperparameters: learning_rate_power, initial_accumulator_value, # l1_regularization_strength, l2_regularization_strength optimizer = tf.train.FtrlOptimizer(params['learning_rate']).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'proximalGD': # can have built-in reg. optimizer = tf.train.ProximalGradientDescentOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'proximalAdagrad': # initial_accumulator_value, reg. optimizer = tf.train.ProximalAdagradOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'RMS': # momentum, epsilon, centered (False/True) if params['decay_rate'] > 0: optimizer = tf.train.RMSPropOptimizer( params['learning_rate'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # default decay_rate 0.9 optimizer = tf.train.RMSPropOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) else: raise ValueError("chose invalid opt_alg %s in params dict" % params['opt_alg']) return optimizer
def ChooseOptimizer(params, regularized_loss, trainable_var): if params['optalg'] == 'adam': optimizer = tf.train.AdamOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'adadelta': if params['decay_rate'] > 0: optimizer = tf.train.AdadeltaOptimizer(params['lr'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # defaults 0.001, 0.95 optimizer = tf.train.AdadeltaOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'adagrad': # also has initial_accumulateor_value parameter optimizer = tf.train.AdagradOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'adagradDA': # Be careful when using AdagradDA for deep networks as it will require careful initialization of the gradient accumulators for it to train. optimizer = tf.train.AdagradDAOptimizer(params['lr'], tf.get_global_step()).minimize(regularized_loss, var_list=trainable_var) # elif optalg == 'momentum': # need to pass in momentum # optimizer = tf.train.MomentumOptimizer(params['lr']).minimize(regularized_loss) elif params['optalg'] == 'ftrl': # lots of hyperparameters: learning_rate_power, initial_accumulator_value, # l1_regularization_strength, l2_regularization_strength optimizer = tf.train.FtrlOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'proximalGD': # can have built-in reg. optimizer = tf.train.ProximalGradientDescentOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'proximalAdagrad': # initial_accumulator_value, reg. optimizer = tf.train.ProximalAdagradOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) elif params['optalg'] == 'RMS': # momentum, epsilon, centered (False/True) if params['decay_rate'] > 0: optimizer = tf.train.RMSPropOptimizer(params['lr'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # default decay_rate 0.9 optimizer = tf.train.RMSPropOptimizer(params['lr']).minimize(regularized_loss, var_list=trainable_var) else: print "chose invalid optalg %s" % params['optalg'] return optimizer
def _setup_global_step(self, global_step): graph_global_step = global_step if graph_global_step is None: graph_global_step = tf.get_global_step() logging.info('graph_global_step: %s', graph_global_step) return tf.cast(graph_global_step, tf.int32)
estimator = DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=tf.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=lambda: tf.AdamOptimizer( learning_rate=tf.exponential_decay( learning_rate=0.1, global_step=tf.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # returns x, y pass estimator.train(input_fn=input_fn_train, steps=100) def input_fn_eval: # returns x, y
def choose_optimizer(params, regularized_loss, trainable_var): """Choose which optimizer to use for the network training. Arguments: params -- dictionary of parameters for experiment regularized_loss -- loss, including regularization trainable_var -- list of trainable TensorFlow variables Returns: optimizer -- optimizer from TensorFlow Class optimizer Side effects: None Raises ValueError if params['opt_alg'] is not 'adam', 'adadelta', 'adagrad', 'adagradDA', 'ftrl', 'proximalGD', 'proximalAdagrad', or 'RMS' """ if params['opt_alg'] == 'adam': optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adadelta': if params['decay_rate'] > 0: optimizer = tf.train.AdadeltaOptimizer( params['learning_rate'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # defaults 0.001, 0.95 optimizer = tf.train.AdadeltaOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adagrad': # also has initial_accumulator_value parameter optimizer = tf.train.AdagradOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adagradDA': # Be careful when using AdagradDA for deep networks as it will require careful initialization of the gradient # accumulators for it to train. optimizer = tf.train.AdagradDAOptimizer(params['learning_rate'], tf.get_global_step()).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'ftrl': # lots of hyperparameters: learning_rate_power, initial_accumulator_value, # l1_regularization_strength, l2_regularization_strength optimizer = tf.train.FtrlOptimizer(params['learning_rate']).minimize( regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'proximalGD': # can have built-in reg. optimizer = tf.train.ProximalGradientDescentOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'proximalAdagrad': # initial_accumulator_value, reg. optimizer = tf.train.ProximalAdagradOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'RMS': # momentum, epsilon, centered (False/True) if params['decay_rate'] > 0: optimizer = tf.train.RMSPropOptimizer( params['learning_rate'], params['decay_rate']).minimize(regularized_loss, var_list=trainable_var) else: # default decay_rate 0.9 optimizer = tf.train.RMSPropOptimizer( params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) else: raise ValueError("chose invalid opt_alg %s in params dict" % params['opt_alg']) return optimizer
def get_optimizer(): lr = tf.train.piecewise_constant(tf.get_global_step(), FLAGS.lr_boundaries, FLAGS.lr_values) opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=FLAGS.momentum, epsilon=FLAGS.adam_epsilon) return opt