def __init__(self): BaseReward.__init__(self) # required if you want to design a custom reward taking into account the # alarm feature self.has_alarm_component = True self.is_alarm_used = False # required to update it in __call__ !! self.total_time_steps = dt_float(0.0) self.reward_min = dt_float(-1.) self.reward_max = dt_float(1.) self.reward_no_game_over = dt_float(0.) self.window_size = None self.best_time = None self.mult_for_right_zone = 2
def __init__(self): BaseReward.__init__(self) self.reward_min = None self.reward_max = None self.worst_cost = None
def __init__(self, *args, **kwargs): BaseReward.__init__(self, *args, **kwargs) warnings.warn( "Reward class has been renamed \"BaseReward\". " "This class Action will be removed in future versions.", category=PendingDeprecationWarning)
def __init__(self, alpha_redisph=1.0): BaseReward.__init__(self) self.reward_min = dt_float(1.0) # carefull here between min and max... self.reward_max = dt_float(300.0 * 70.0) self.alpha_redisph = dt_float(alpha_redisph)
def __init__(self): BaseReward.__init__(self) self.reward_min = dt_float(0.0) self.reward_max = dt_float(1.0) self.penalty_max_at_n_lines = dt_float(2.0)
def __init__(self, per_timestep=1): BaseReward.__init__(self) self.per_timestep = dt_float(per_timestep) self.reward_min = dt_float(0.0)
def __init__(self): BaseReward.__init__(self)
def __init__(self, per_timestep=1): BaseReward.__init__(self) self.per_timestep = per_timestep self.total_reward = 0 self.reward_min = 0 self.reward_max = per_timestep
def __init__(self): BaseReward.__init__(self) self.reward_min = None self.reward_max = None self.max_regret = dt_float(0.0) self.reward_illegal_ambiguous = None
def __init__(self): BaseReward.__init__(self) self.reward_min = -1000.0 self.reward_max = 1000.0 self.penalty_per_diff = -500.0
def __init__(self, min_pen_lte=0.0, max_pen_gte=1.0): BaseReward.__init__(self) self.reward_min = dt_float(0.0) self.reward_max = dt_float(1.0) self.min_pen_lte = dt_float(min_pen_lte) self.max_pen_gte = dt_float(max_pen_gte)
def __init__(self): BaseReward.__init__(self) self.reward_min = 0 self.reward_max = 0 self.rewards = {}
def __init__(self, alpha_redisph=5.0): BaseReward.__init__(self) self.reward_min = None self.reward_max = None self.max_regret = 0. self.alpha_redisph = alpha_redisph
def __init__(self, per_timestep=1): BaseReward.__init__(self) self.per_timestep = dt_float(per_timestep) self.total_time_steps = dt_float(0.0) self.reward_min = dt_float(0.) self.reward_max = dt_float(1.)
def __init__(self): BaseReward.__init__(self) self.reward_min = -1000.0 self.reward_max = 1000.0
def __init__(self): BaseReward.__init__(self) self.reward_min = dt_float(0.0) self.reward_max = dt_float(1.0) self.worst_cost = None
def __init__(self): BaseReward.__init__(self) self.reward_min = dt_float(0.0) self.reward_max = dt_float(1.0) self.max_overflowed = dt_float(5.0)
def __init__(self): BaseReward.__init__(self) self.reward_min = dt_float(-1.0) self.reward_max = dt_float(1.0)
def __init__(self, max_lines=5): BaseReward.__init__(self) self.reward_min = dt_float(0.0) self.reward_max = dt_float(1.0) self.max_overflowed = dt_float(max_lines)
def __init__(self, alpha_redisph=5.0): BaseReward.__init__(self) self.reward_min = None self.reward_max = None self.max_regret = dt_float(0.0) self.alpha_redisph = dt_float(alpha_redisph)
def __init__(self): BaseReward.__init__(self) self.reward_min = -1.0 self.reward_illegal = -0.5 self.reward_max = 1.0