class HouseRewardTestCase(unittest.TestCase): """ Testing the reward function (house is the source of it) Note: 'reward' is used as the standard reinforcement learning name, but the function works as a penalty. """ def setUp(self): self.house = House(timeframe=5) self.house.day_start = 7 * 60 self.house.day_end = 24 * 60 - 5 * 60 self.house.daytime = 12 * 60 # default # define perfect conditions (so reward should be zero) self.house._calculate_energy_cost = MagicMock(return_value=0) self.house.user_requests = { 'temp_desired': 21, 'temp_epsilon': 0.5, 'light_desired': 0.7, 'light_epsilon': 0.05 } self.house.inside_sensors = { 'first': { 'temperature': 21, 'light': 0.7 } } def test_reward_for_perfect_conditions(self): """ Reward should be zero as the factors are perfect """ reward = self.house.reward() self.assertEqual(reward, 0, "Reward should be zero, factors are perfect!") def test_reward_returns_nonpositive_values(self): """ The reward in the simulator is modeled as a penalty. It shouldn't return positive values. """ testing_pairs = ((-40, 0), (0, 0), (10, 0), (15, 0.02), (15, 0.5), (21, 0.4), (264, 0.99), (math.pi, 1)) for temp, light in testing_pairs: self.house.inside_sensors = { 'first': { 'temperature': temp, 'light': light } } reward = self.house.reward() self.assertLessEqual(reward, 0, "Reward shouldn't be positive!") def test_reward_decrease_with_energy_cost(self): """ Energy cost is the base parameter and with cost increase, penalty should be bigger """ reward = self.house.reward() self.house._calculate_cost_and_update_energy_source = \ MagicMock(return_value=100) self.assertLess(self.house.reward(), reward, "Reward should decrease, cost parameter got worse!") self.house._calculate_cost_and_update_energy_source = \ MagicMock(return_value=0) self.house.inside_sensors = { 'first': { 'temperature': 20, 'light': 0.7 } } self.assertLess(self.house.reward(), reward, "Reward should decrease, temperature got worse!") self.house.inside_sensors = { 'first': { 'temperature': 21, 'light': 0.4 } } self.assertLess(self.house.reward(), reward, "Reward should decrease, light got worse!")