def testExponentialWithoutLinearRamp(self): p = self._testParams() lr_util.SetExponentialLR(p.train, p.input, exp_start_epoch=0, total_epoch=10) schedule_layer = p.train.lr_schedule.Instantiate() with self.session() as sess: # Peak learning rate at 0. self.assertEqual(sess.run(schedule_layer.Value(0)), 1.) # Exponential ramp down within first epoch. self.assertLess(sess.run(schedule_layer.Value(4)), 1.)
def testExponentialWithLinearRamp(self): p = self._testParams() lr_util.SetExponentialLR(p.train, p.input, warmup_epoch=1, exp_start_epoch=2, total_epoch=10, warmup_init=0.) schedule_layer = p.train.lr_schedule.Instantiate() with self.session() as sess: # Linear ramp up. self.assertLess(sess.run(schedule_layer.Value(8)), 1.) # Peak learning rate. self.assertEqual(sess.run(schedule_layer.Value(16)), 1.) # Still at peak learning rate. self.assertEqual(sess.run(schedule_layer.Value(24)), 1.) # Exponential ramp down. self.assertLess(sess.run(schedule_layer.Value(48)), 1.)
def testExponentialWithLinearRamp(self): p = self._testParams() lr_util.SetExponentialLR(p.train, p.input, warmup_epoch=1, exp_start_epoch=2, total_epoch=10, warmup_init=0.) schedule_layer = p.train.lr_schedule.Instantiate() with self.session(): # Linear ramp up. with py_utils.GlobalStepContext(8): self.assertLess(self.evaluate(schedule_layer.Value()), 1.) # Peak learning rate. with py_utils.GlobalStepContext(16): self.assertEqual(self.evaluate(schedule_layer.Value()), 1.) # Still at peak learning rate. with py_utils.GlobalStepContext(24): self.assertEqual(self.evaluate(schedule_layer.Value()), 1.) # Exponential ramp down. with py_utils.GlobalStepContext(48): self.assertLess(self.evaluate(schedule_layer.Value()), 1.)
def Task(self): num_classes = len( kitti_input_generator.KITTILabelExtractor.KITTI_CLASS_NAMES) p = starnet.ModelV2.Params( num_classes, num_anchor_bboxes_offsets=self.NUM_ANCHOR_BBOX_OFFSETS, num_anchor_bboxes_rotations=self.NUM_ANCHOR_BBOX_ROTATIONS, num_anchor_bboxes_dimensions=self.NUM_ANCHOR_BBOX_DIMENSIONS) p.name = 'sparse_detector' tp = p.train tp.optimizer = optimizer.Adam.Params() tp.clip_gradient_norm_to_value = 5 ep = p.eval # Evaluate the whole dataset. ep.samples_per_summary = 0 # To be tuned. p.train.l2_regularizer_weight = 1e-4 # Adapted from V1 tuning. tp.ema_decay = 0.99 # TODO(b/148537111): consider setting this to True. tp.ema_decay_moving_vars = False tp.learning_rate = 0.001 lr_util.SetExponentialLR(train_p=tp, train_input_p=self.Train(), exp_start_epoch=150, total_epoch=650) p.dimension_loss_weight = .3 p.location_loss_weight = 3. p.loss_weight_classification = 1. p.loss_weight_localization = 3. p.rotation_loss_weight = 0.3 return p
def Task(self): metadata = waymo_metadata.WaymoMetadata() num_classes = len(metadata.ClassNames()) p = starnet.ModelV2.Params( num_classes, num_anchor_bboxes_offsets=self.NUM_ANCHOR_BBOX_OFFSETS, num_anchor_bboxes_rotations=self.NUM_ANCHOR_BBOX_ROTATIONS, num_anchor_bboxes_dimensions=self.NUM_ANCHOR_BBOX_DIMENSIONS, num_laser_features=3) # Update the Point Cloud Featurizer architecture starnet_builder = starnet.Builder() starnet_builder.linear_params_init = ( py_utils.WeightInit.KaimingUniformFanInRelu()) gin_layers = [[ self.GIN_HIDDEN_DIMS * 2, self.GIN_HIDDEN_DIMS * 4, self.GIN_HIDDEN_DIMS ]] * self.NUM_GIN_LAYERS # pyformat: disable p.cell_featurizer = starnet_builder.GINFeaturizerV2( 'feat', num_laser_features=3, fc_dims=self.GIN_HIDDEN_DIMS, mlp_dims=gin_layers, fc_use_bn=False) p.cell_feature_dims = self.GIN_HIDDEN_DIMS * (self.NUM_GIN_LAYERS + 1) p.output_decoder = waymo_decoder.WaymoOpenDatasetDecoder.Params() p.max_nms_boxes = 512 p.use_oriented_per_class_nms = True # Note: Sub-classes need to set nms_iou_threshold and nms_score_threshold # appropriately. p.nms_iou_threshold = [0.0] * num_classes # TODO(jngiam): 1.1 for untrained classes is needed to avoid an issue # with boxutils error. p.nms_score_threshold = [1.1] * num_classes p.name = 'starnet' tp = p.train tp.optimizer = optimizer.Adam.Params() tp.clip_gradient_norm_to_value = 5 ep = p.eval # Train set uses a smaller decoding set, so we can # safely eval over the entire input. ep.samples_per_summary = 0 # To be tuned. p.train.l2_regularizer_weight = 1e-8 cluster = cluster_factory.Current() train_cluster_p = cluster.params.Copy() train_cluster_p.job = 'trainer_client' train_cluster_p.mode = 'sync' # When running a decoding only job, there are no trainer workers, so we set # worker replicas to 1 as a dummy value. if train_cluster_p.worker.replicas <= 0: train_cluster_p.worker.replicas = 1 # Set learning rate and schedule. with cluster_factory.Cluster(train_cluster_p): train_input_p = self.Train() # Adapted from V1 tuning. tp.ema_decay = 0.99 # TODO(b/148537111): consider setting this to True. tp.ema_decay_moving_vars = False tp.learning_rate = 0.001 lr_util.SetExponentialLR(train_p=tp, train_input_p=train_input_p, exp_start_epoch=5, total_epoch=75) p.dimension_loss_weight = .3 p.location_loss_weight = 3. p.loss_weight_classification = 1. p.loss_weight_localization = 3. p.rotation_loss_weight = 0.3 return p