def get_config(): basename = os.path.basename(__file__) logger.set_logger_dir( os.path.join('train_log', basename[:basename.rfind('.')])) # prepare dataset dataset_train = tp.BatchData(tp.dataset.Mnist('train'), 128) dataset_test = tp.BatchData(tp.dataset.Mnist('test'), 256, remainder=True) step_per_epoch = dataset_train.size() # prepare session sess_config = tp.get_default_sess_config() sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 lr = tf.train.exponential_decay( learning_rate=1e-3, global_step=tp.get_global_step_var(), decay_steps=dataset_train.size() * 10, decay_rate=0.3, staircase=True, name='learning_rate') tf.scalar_summary('learning_rate', lr) return tp.TrainConfig( dataset=dataset_train, optimizer=tf.train.AdamOptimizer(lr), callbacks=Callbacks([ StatPrinter(), ModelSaver(), InferenceRunner(dataset_test, [ScalarStats('cost'), ClassificationError() ]) ]), session_config=sess_config, model=Model(), step_per_epoch=step_per_epoch, max_epoch=100, )
def optimizer(self) -> Any: lr = tf.train.exponential_decay( learning_rate=self.hparams["base_learning_rate"], global_step=tp.get_global_step_var(), decay_steps=self.hparams["decay_steps"], decay_rate=self.hparams["decay_rate"], staircase=True, name="learning_rate", ) tf.summary.scalar("lr", lr) return tf.train.AdamOptimizer(lr)
def _setup(self): super(GANTrainer, self)._setup() with TowerContext(''): actual_inputs = self._get_input_tensors() self.model.build_graph(actual_inputs) self.g_min = self.config.optimizer.minimize(self.model.g_loss, var_list=self.model.g_vars, name='g_op') self.d_min = self.config.optimizer.minimize(self.model.d_loss, var_list=self.model.d_vars, name='d_op') self.gs_incr = tf.assign_add(get_global_step_var(), 1, name='global_step_incr') self.summary_op = summary_moving_average() self.d_min = tf.group(self.d_min, self.summary_op, self.gs_incr)
def build_graph(self, image, label): image = image / 128.0 assert tf.test.is_gpu_available() with tf.variable_scope(self._name): x = ScaleNormConv2D(image, 16, 3, 1, name="conv_input") # shape = [batchsize, 32, 32, 16] x = CifarResNet.build_group(x, self._n, 16, stride=1, mult_decay=self._mult_decay, name="g1") # shape = [batchsize, 16, 16, 32] x = CifarResNet.build_group(x, self._n, 32, stride=2, mult_decay=self._mult_decay, name="g2") # shape = [batchsize, 8, 8, 64] x = CifarResNet.build_group(x, self._n, 64, stride=2, mult_decay=self._mult_decay, name="g3") # normalise the final output by the accumulated multiplier #x = BatchNorm("bn_last", x, epsilon=EPSILON, center=False, scale=True) x = ActBias(x, name="act_top") # x = GlobalAvgPooling("gap", x) logits = FullyConnected("linear", x, self._n_classes) prob = tf.nn.softmax(logits, name="prob") cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name="cross_entropy_loss") wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name="wrong_vector") add_moving_summary(tf.reduce_mean(wrong, name="train_error")) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) return tf.add_n([cost, wd_cost], name="cost")
def build_graph(self, image, label): scale_image = 1. / 128.0 image = image * scale_image image_moment2 = CIFAR_TRAIN_PIXEL_MOMENT2 * scale_image * scale_image assert tf.test.is_gpu_available() with tf.variable_scope(self._name): x = NormConv2DScale(image, 16, 3, 1, center=self._center, input_moment2=image_moment2, name="conv_input") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 32, 32, 16] x = CifarResNet.build_group(x, self._n, 16, stride=1, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g1") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 16, 16, 32] x = CifarResNet.build_group(x, self._n, 32, stride=2, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g2") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 8, 8, 64] x = CifarResNet.build_group(x, self._n, 64, stride=2, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g3") add_activation_summary(x, types=["mean", "rms", "histogram"]) x = ActBias(x, name="act_top") # x = GlobalAvgPooling("gap", x) logits = FullyConnected("linear", x, self._n_classes) prob = tf.nn.softmax(logits, name="prob") cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name="cross_entropy_loss") wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name="wrong_vector") add_moving_summary(tf.reduce_mean(wrong, name="train_error")) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) add_param_summary(('.*/theta', ['histogram'])) add_param_summary(('.*/ma_mu', ['histogram'])) return tf.add_n([cost, wd_cost], name="cost")