コード例 #1
0
    def test_save_callback(self):
        '''
        Test that the model performance can be monitored and results can be 
        checked and saved as the model improves. This test trains an agent
        for a short period of time, without loading a pre-trained model. 
        Therefore, this test also checks that a RL from stable-baselines 
        can be trained.
        
        '''
        # Define logging directory. Monitoring data and agent model will be stored here
        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents',
                               'monitored_A2C')

        # Perform a short training example with callback
        env, _, _ = run_save_callback.train_A2C_with_callback(
            log_dir=log_dir, tensorboard_log=None)

        # Load the trained agent
        model = A2C.load(os.path.join(log_dir, 'best_model'))

        # Test one step with the trained model
        obs = env.reset()
        df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'save_callback.csv')
        self.compare_ref_values_df(df, ref_filepath)

        # Remove model to prove further testing
        shutil.rmtree(log_dir, ignore_errors=True)
コード例 #2
0
def train_PPO2_predictive(start_time_tests    = [31*24*3600, 304*24*3600], 
                         episode_length_test = 14*24*3600, 
                         load                = False):
    '''Method to train (or load a pre-trained) PPO2 agent. Testing periods 
    have to be introduced already here to not use these during training. 
    
    Parameters
    ----------
    start_time_tests : list of integers
        Time in seconds from the beginning of the year that will be used 
        for testing. These periods should be excluded in the training 
        process. By default the first day of February and the first day of
        November are used. 
    episode_length_test : integer
        Number of seconds indicating the length of the testing periods. By
        default two weeks are reserved for testing. 
    load : boolean
        Boolean indicating whether the algorithm is loaded (True) or 
        needs to be trained (False)
     
    '''
    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append((start_time_test,start_time_test+episode_length_test))
    # Summer period (from June 21st till September 22nd). 
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173*24*3600, 266*24*3600))  
    
    env = BoptestGymEnvRewardWeightCost(url                   = url,
                                        actions               = ['oveHeaPumY_u'],
                                        observations          = {'reaTZon_y':   (280.,310.),
                                                                 'LowerSetp[1]':(280.,310.),
                                                                 'UpperSetp[1]':(280.,310.),
                                                                 'TDryBul':     (250.,310.),
                                                                 'HGloHor':     (0.,  1000.)}, 
                                        random_start_time     = True,
                                        excluding_periods     = excluding_periods,
                                        forecasting_period    = 1*24*3600,
                                        max_episode_length    = 1*24*3600,
                                        warmup_period         = 3*3600,
                                        Ts                    = 900)
    
    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)  
    
    model = PPO2('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed,
                tensorboard_log=os.path.join('results'))
    
    if not load: 
        model.learn(total_timesteps=int(1e5))
        # Save the agent
        model.save(os.path.join(utilities.get_root_path(), 'examples',
                                'agents', 'ppo2_pred_bestest_hydronic_heatpump'))
    else:
        # Load the trained agent
        model = PPO2.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'ppo2_pred_bestest_hydronic_heatpump'))
    
    return env, model, start_time_tests
コード例 #3
0
    def test_variable_episode(self):
        '''
        Test that a model can be trained using variable episode length. 
        The method that is used to determine whether the episode is 
        terminated or not is defined by the user. This test trains an agent
        for a short period of time, without loading a pre-trained model. 
        Therefore, this test also checks that a RL from stable-baselines 
        can be trained. This test also uses the save callback to check that
        the variable episode length is being effectively used. 
        Notice that this test also checks that child classes can be nested
        since the example redefines the `compute_reward` and the 
        `compute_done` methods. 
        
        '''
        # Define logging directory. Monitoring data and agent model will be stored here
        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents',
                               'variable_episode_A2C')

        # Perform a short training example with callback
        env, _, _ = run_variable_episode.train_A2C_with_variable_episode(
            log_dir=log_dir, tensorboard_log=None)

        # Load the trained agent
        model = A2C.load(os.path.join(log_dir, 'best_model'))

        # Test one step with the trained model
        obs = env.reset()
        df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'variable_episode_step.csv')
        self.compare_ref_values_df(df, ref_filepath)

        # Check variable lengths
        monitor = pd.read_csv(os.path.join(log_dir, 'monitor.csv'),
                              index_col=None)
        monitor = monitor.iloc[1:]
        monitor.reset_index(inplace=True)
        monitor.columns = ['reward', 'episode_length', 'time']

        # Time may vary from one computer to another
        monitor.drop(labels='time', axis=1, inplace=True)

        # Utilities require index to have time as index name (even this is not the case here)
        monitor.index.name = 'time'

        # Transform to numeric
        monitor = monitor.apply(
            lambda col: pd.to_numeric(col, errors='coerce'))

        # Check that we obtain always same monitoring parameters
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references',
                                    'variable_episode_monitoring.csv')
        self.compare_ref_timeseries_df(monitor, ref_filepath)

        # Remove model to prove further testing
        shutil.rmtree(log_dir, ignore_errors=True)
コード例 #4
0
    def check_obs_act_rew_kpi(self,
                              obs=None,
                              act=None,
                              rew=None,
                              kpi=None,
                              label='default'):
        '''Auxiliary method to check for observations, actions, rewards, 
        and/or kpis of a particular test case run. 
        
        '''

        # Check observation values
        if obs is not None:
            df = pd.DataFrame(obs)
            df.index.name = 'time'
            ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                        'references',
                                        'observations_{}.csv'.format(label))
            self.compare_ref_timeseries_df(df, ref_filepath)

        # Check actions values
        if act is not None:
            df = pd.DataFrame(act)
            df.index.name = 'time'
            ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                        'references',
                                        'actions_{}.csv'.format(label))
            self.compare_ref_timeseries_df(df, ref_filepath)

        # Check reward values
        if rew is not None:
            df = pd.DataFrame(rew)
            df.index.name = 'time'
            ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                        'references',
                                        'rewards_{}.csv'.format(label))
            self.compare_ref_timeseries_df(df, ref_filepath)

        if kpi is not None:
            df = pd.DataFrame(data=[kpi]).T
            df.columns = ['value']
            df.index.name = 'keys'
            # Time ratio is not checked since depends on the machine where tests are run
            df.drop('time_rat', inplace=True)
            ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                        'references',
                                        'kpis_{}.csv'.format(label))
            self.compare_ref_values_df(df, ref_filepath)
コード例 #5
0
 def test_behavior_cloning_disc(self):
     '''Check that an agent using discrete action space (in this case
     we use DQN) can be pretrained using behavior cloning from an 
     expert trajectory that needs to be generated beforehand. The test
     pretrains the agent with 1000 epochs and directly tests its 
     performance without further learning. 
     
     '''
     expert_traj = os.path.join(utilities.get_root_path(), 'examples',
                                'trajectories', 'expert_traj_disc_28.npz')
     self.partial_test_RL(case='D',
                          algorithm='DQN',
                          mode='train',
                          training_timesteps=0,
                          expert_traj=expert_traj)
コード例 #6
0
    def test_reset_fixed(self):
        '''Test that the environment can reset using a fixed start time
        and a specific warmup period. 
        
        '''

        self.env.random_start_time = False
        self.env.start_time = 14 * 24 * 3600
        self.env.warmup_period = 3 * 3600

        obs = self.env.reset()

        # Check values
        df = pd.DataFrame(data=[obs],
                          index=['obs_reset_fixed'],
                          columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'reset_fixed.csv')
        self.compare_ref_values_df(df, ref_filepath)
コード例 #7
0
    def test_reset_random(self):
        '''Test that the environment can reset using a random start time
        that is out of the specified `excluding_periods`. This test also
        checks that the seed for random initialization works properly. 
        
        '''

        self.env.random_start_time = True
        self.env.warmup_period = 1 * 3600
        # Set the excluding periods to be the two first weeks of February
        # and the two first weeks of November
        excluding_periods = [(31 * 24 * 3600, 31 * 24 * 3600 + 14 * 24 * 3600),
                             (304 * 24 * 3600,
                              304 * 24 * 3600 + 14 * 24 * 3600)]
        self.env.excluding_periods = excluding_periods
        random.seed(123456)
        start_times = OrderedDict()
        # Reset hundred times
        for i in range(100):
            obs = self.env.reset()
            start_time = self.env.start_time
            episode = (start_time, start_time + self.env.max_episode_length)
            for period in excluding_periods:
                # Make sure that the episodes don't overlap with excluding_periods
                assert not(episode[0] < period[1] and period[0] < episode[1]),\
                        'reset is not working properly when generating random times. '\
                        'The episode with starting time {0} and end time {1} '\
                        'overlaps with period {2}. This corresponds to the '\
                        'generated starting time number {3}.'\
                        ''.format(start_time,start_time+self.env.max_episode_length,period,i)
            start_times[start_time] = obs

        # Check values
        df = pd.DataFrame.from_dict(start_times,
                                    orient='index',
                                    columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'reset_random.csv')
        self.compare_ref_values_df(df, ref_filepath)
コード例 #8
0
def train_A2C_with_callback(start_time_tests    = [31*24*3600, 304*24*3600], 
                            episode_length_test = 14*24*3600,
                            log_dir = os.path.join(utilities.get_root_path(), 
                                'examples', 'agents', 'monitored_A2C'),
                            tensorboard_log     = os.path.join('results')):
    '''Method to train an A2C agent using a callback to save the model 
    upon performance improvement.  
    
    Parameters
    ----------
    start_time_tests : list of integers
        Time in seconds from the beginning of the year that will be used 
        for testing. These periods should be excluded in the training 
        process. By default the first day of February and the first day of
        November are used. 
    episode_length_test : integer
        Number of seconds indicating the length of the testing periods. By
        default two weeks are reserved for testing.  
    log_dir : string
        Directory where monitoring data and best trained model are stored.
    tensorboard_log : path
        Path to directory to load tensorboard logs.
    
    '''
    
    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append((start_time_test,start_time_test+episode_length_test))
    # Summer period (from June 21st till September 22nd). 
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173*24*3600, 266*24*3600))  
    
    # Use only one hour episode to have more callbacks
    env = BoptestGymEnvRewardWeightCost(url                   = url,
                                        actions               = ['oveHeaPumY_u'],
                                        observations          = {'reaTZon_y':(280.,310.)}, 
                                        random_start_time     = True,
                                        excluding_periods     = excluding_periods,
                                        max_episode_length    = 1*3600, 
                                        warmup_period         = 3*3600,
                                        step_period           = 900)
    
    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)  
    
    os.makedirs(log_dir, exist_ok=True)
    
    # Modify the environment to include the callback
    env = Monitor(env=env, filename=os.path.join(log_dir,'monitor.csv'))
    
    # Create the callback: check every 10 steps. We keep it very short for testing 
    callback = SaveOnBestTrainingRewardCallback(check_freq=10, log_dir=log_dir)
    
    # Initialize the agent
    model = A2C('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed,
                tensorboard_log=tensorboard_log)
    
    # Train the agent with callback for saving
    model.learn(total_timesteps=int(100), callback=callback)
    
    return env, model, start_time_tests
コード例 #9
0
def train_A2C_with_variable_episode(
    start_time_tests=[31 * 24 * 3600, 304 * 24 * 3600],
    episode_length_test=14 * 24 * 3600,
    log_dir=os.path.join(utilities.get_root_path(), 'examples', 'agents',
                         'variable_episode_A2C'),
    tensorboard_log=os.path.join('results')):
    '''Method to train an A2C agent using a callback to save the model 
    upon performance improvement.  
    
    Parameters
    ----------
    start_time_tests : list of integers
        Time in seconds from the beginning of the year that will be used 
        for testing. These periods should be excluded in the training 
        process. By default the first day of February and the first day of
        November are used. 
    episode_length_test : integer
        Number of seconds indicating the length of the testing periods. By
        default two weeks are reserved for testing.    
    log_dir : string
        Directory where monitoring data and best trained model are stored.
    tensorboard_log : path
        Path to directory to load tensorboard logs.
    
    '''

    # Define custom child class:
    class BoptestGymEnvVariableEpisodeLength(BoptestGymEnvRewardWeightCost):
        '''Boptest gym environment that redefines the reward function to 
        weight more the operational cost when compared with the default reward
        function. 
        
        '''
        def compute_done(self,
                         res,
                         reward=None,
                         objective_integrand_threshold=0.1):
            '''Custom method to determine that the episode is done not only 
            when the maximum episode length is exceeded but also when the 
            objective integrand overpasses a certain threshold. The latter is
            useful to early terminate agent strategies that do not work, hence
            avoiding unnecessary steps and leading to improved sampling 
            efficiency. 
            
            Returns
            -------
            done: boolean
                Boolean indicating whether the episode is done or not.  
            
            '''

            done =  (res['time'] >= self.start_time + self.max_episode_length)\
                    or \
                    (self.objective_integrand >= objective_integrand_threshold)

            return done

    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append(
            (start_time_test, start_time_test + episode_length_test))
    # Summer period (from June 21st till September 22nd).
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173 * 24 * 3600, 266 * 24 * 3600))

    # Use only six hours as max_episode_length to have more callbacks
    env = BoptestGymEnvVariableEpisodeLength(
        url=url,
        actions=['oveHeaPumY_u'],
        observations={'reaTZon_y': (280., 310.)},
        random_start_time=True,
        excluding_periods=excluding_periods,
        max_episode_length=6 * 3600,
        warmup_period=3 * 3600,
        step_period=900)

    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)

    os.makedirs(log_dir, exist_ok=True)

    # Modify the environment to include the callback
    env = Monitor(env=env, filename=os.path.join(log_dir, 'monitor.csv'))

    # Create the callback: check every 10 steps. We keep it very short for testing
    callback = SaveOnBestTrainingRewardCallback(check_freq=10, log_dir=log_dir)

    # Initialize the agent
    model = A2C('MlpPolicy',
                env,
                verbose=1,
                gamma=0.99,
                seed=seed,
                tensorboard_log=tensorboard_log)

    # Train the agent with callback for saving
    model.learn(total_timesteps=int(100), callback=callback)

    return env, model, start_time_tests
コード例 #10
0
 expert_traj : string
     Path to expert trajectory in .npz format. If not None, the agent 
     will be pretrained using behavior cloning with these data. 
     
 '''
 
 excluding_periods = []
 for start_time_test in start_time_tests:
     excluding_periods.append((start_time_test,
                               start_time_test+episode_length_test))
 # Summer period (from June 21st till September 22nd). 
 # Excluded since no heating during this period (nothing to learn).
 excluding_periods.append((173*24*3600, 266*24*3600))  
 
 # Create a log directory
 log_dir = os.path.join(utilities.get_root_path(), 'examples', 
     'agents', '{}_{}_{:.0e}_logdir'.format(algorithm,case,training_timesteps))
 log_dir = log_dir.replace('+', '')
 os.makedirs(log_dir, exist_ok=True)
 
 # Redefine reward function
 class BoptestGymEnvCustomReward(BoptestGymEnv):
     '''Define a custom reward for this building
     
     '''
     def compute_reward(self):
         '''Custom reward function
         
         '''
         
         # Compute BOPTEST core kpis
コード例 #11
0
                                            random_start_time     = True,
                                            excluding_periods     = excluding_periods,
                                            max_episode_length    = 1*24*3600,
                                            warmup_period         = 1*24*3600,
                                            step_period           = 900)    
    
    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)  
    
    model = A2C('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed,
                tensorboard_log=tensorboard_log)
    
    if not load: 
        model.learn(total_timesteps=int(1e5))
        # Save the agent
        model = A2C.save(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_{}'.format(case)))
    else:
        # Load the trained agent
        model = A2C.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_{}'.format(case)))
    
    return env, model, start_time_tests
        
def test_feb(env, model, start_time_tests, 
             episode_length_test, warmup_period_test, plot=False):
    ''' Perform test in February
    
    '''

    observations, actions, rewards, kpis = test_agent(env, model, 
コード例 #12
0
    render : boolean
        If true, it renders every episode while training.
        
    '''

    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append(
            (start_time_test, start_time_test + episode_length_test))
    # Summer period (from June 21st till September 22nd).
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173 * 24 * 3600, 266 * 24 * 3600))

    # Create a log directory
    log_dir = os.path.join(
        utilities.get_root_path(), 'examples', 'agents',
        '{}_{}_{:.0e}_logdir'.format(algorithm, case, training_timesteps))
    log_dir = log_dir.replace('+', '')
    os.makedirs(log_dir, exist_ok=True)

    # Redefine reward function
    class BoptestGymEnvCustomReward(BoptestGymEnv):
        '''Define a custom reward for this building
        
        '''
        def compute_reward(self):
            '''Custom reward function
            
            '''

            # Compute BOPTEST core kpis