def Params(cls): p = super(BaseModel, cls).Params() p.Define( 'model', None, 'Which python function generates the param. It includes ' 'the file name and lineno where the function is defined.') p.Define( 'cluster', cluster_factory.Cluster.Params(), 'The training cluster. Individual layer may config differently' ' based on training cluster it is running under.') p.Define('input', None, 'Input generator Params.') p.Define('build_data', build_data.BuildData(), 'Build data of this binary.') p.Define('train', hyperparams.Params(), 'Params to control how this model should be trained.') tp = p.train tp.Define( 'start_up_delay_steps', 200, 'i-th replica starts training after ' 'i*(i+1)/2*start_up_delay_steps steps') tp.Define('max_steps', 4 * 10**6, 'Training max of 4M steps.') tp.Define('tpu_steps_per_loop', 100, 'The number of training steps per ' 'training loop for TPUs.') tp.Define( 'ema_decay', 0.0, 'If > 0, enable ExponentialMovingAverage during training ' 'with the give decay. ' 'Must be < 1. Disabled if <= 0.') tp.Define('init_from_checkpoint_rules', {}, 'See BaseTask documentation for details.') tp.Define('early_stop', None, 'Early stopping based on dev-set performance.') tp.Define('save_interval_seconds', 60 * 10, 'Generates a checkpoint roughly once every this many seconds.') tp.Define('summary_interval_steps', 100, 'Generates a checkpoint roughly once every this many steps.') return p
def Params(cls): p = super().Params() p.Define( 'model', None, 'Which python function generates the param. It includes ' 'the file name and lineno where the function is defined.') p.Define( 'cluster', cluster_factory.Cluster.Params(), 'The training cluster. Individual layer may config differently' ' based on training cluster it is running under.') p.Define('input', None, 'Input generator Params.') p.Define('build_data', build_data.BuildData(), 'Build data of this binary.') p.Define('train', hyperparams.Params(), 'Params to control how this model should be trained.') p.Define('reporting_job', 'evaler', 'Name of job that reports trial results.') tp = p.train tp.Define( 'start_up_delay_steps', 200, 'i-th replica starts training after ' 'i*(i+1)/2*start_up_delay_steps steps') tp.Define('max_steps', 4 * 10**6, 'Training max of 4M steps.') tp.Define('tpu_steps_per_loop', 1000, 'The number of training steps per ' 'training loop for TPUs.') tp.Define( 'tpu_device_order_mode', None, 'A device_assignment_lib.DeviceOrderMode enum that determines whether ' 'to assign devices in a way that the order of replicas or ' 'model-parallel cores will form a ring or mesh, or let the library to ' 'choose. Default None to AUTO.') tp.Define( 'ema_decay', 0.0, 'If > 0, enable ExponentialMovingAverage during training ' 'with the give decay. ' 'Must be < 1. Disabled if <= 0. ' 'Must be set consistent across all tasks.') tp.Define( 'ema_decay_moving_vars', None, 'If True, include variables from collection "moving_vars" in ema. ' 'Must be set consistent across all tasks.') tp.Define('init_from_checkpoint_rules', {}, 'See BaseTask documentation for details.') tp.Define('early_stop', None, 'Early stopping based on dev-set performance.') tp.Define( 'enqueue_max_steps', -1, 'Max enqueue steps. -1 meaning no limit.' ' This flag should be set for unit-test only.') tp.Define('save_interval_seconds', 60 * 10, 'Generates a checkpoint roughly once every this many seconds.') tp.Define('save_max_to_keep', 100, 'Maximum number of recent checkpoints to keep.') tp.Define('save_keep_checkpoint_every_n_hours', 0.5, 'How often to keep a checkpoint.') tp.Define('summary_interval_steps', 100, 'Generates a checkpoint roughly once every this many steps.') return p