Python reward_maker Examples, main.reward_maker.reward_maker Python Examples

Example #1

0

Show file

def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred-Heuristic', epochs=1,
                 threshold_dem=50, deactivate_SMS=False, deactivate_LION=False):

    # Naming the agent:
    now            = datetime.now()
    NAME           = str(round(threshold_dem))+'_TARGET_VALUE_'+HEURISTIC_TYPE+now.strftime("_%d-%m-%Y_%H-%M-%S")
    
    # Import dataset and logger based on the common settings
    df, power_dem_df, logger, period_min = dataset_and_logger(NAME)


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 1000,
        SMS_Anschaffungs_Preis  = 115000/3,
        SMS_max_Nutzungsjahre   = 20,
        Leistungspreis          = 102,
        # Setup logging tags:
        logging_list            = ['cost_saving','exact_costs','sum_exact_costs','sum_cost_saving'],
        # Deactivation options for the batteries:
        deactivate_SMS          = deactivate_SMS,
        deactivate_LION         = deactivate_LION)


    # Lade Environment:
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = ['norm_total_power','normal','seq_max'],
        # Batters stats:
        max_SMS_SoC    = 12/3,
        max_LION_SoC   = 54,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # Heuristics can only use continious values:
        ACTION_TYPE    = 'contin',
        OBS_TYPE       = 'contin',
        # Define heuristic usage:
        AGENT_TYPE     = 'heuristic')
    # Use the complete dataset (no validation split):
    env.use_all_data()

    # Setup Agent
    agent = heurisitc(
        env = env,
        HEURISTIC_TYPE = HEURISTIC_TYPE,
        threshold_dem  = threshold_dem)


    return agent.calculate(epochs=epochs)

Example #2

0

Show file

File: tuning_dqn.py Project: maik97/peak-shaver

def run_agent(name='',gamma=.9, lr=0.1, tau=0.15, update_num=500,
              epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max'],
              hidden_size=256, pre_trained_model=None, target_update_num=None):
    '''
    Trains and tests a DQN based on the passed parameter.
    '''
    
    # Naming the agent:
    now  = datetime.now()
    NAME = 'agent_DQN_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    # Import dataset and logger based from the common settings
    df, power_dem_df, logger, period_min = dataset_and_logger(NAME)

    # Number of warm-up steps:
    num_warmup_steps = 100
    # Number of epochs and steps:
    epochs           = 100


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,)


    # Setup common_env
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = 25,
        max_LION_SoC       = 54,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # DQN inputs can be conti and outputs must be discrete:
        ACTION_TYPE    = 'discrete',
        OBS_TYPE       = 'contin',
        # Set number of discrete values:
        discrete_space = 22,
        # Size of validation data:
        val_split      = 0.1)

    # Setup Agent:
    agent = DQN(
        env            = env,
        memory_len     = update_num,
        # Training parameter:
        gamma          = gamma,
        epsilon        = 0.99,
        epsilon_min    = 0.1,
        epsilon_decay  = epsilon_decay,
        lr             = lr,
        tau            = tau,
        activation     = 'relu',
        loss           = 'mean_squared_error',
        hidden_size    = hidden_size,
        pre_trained_model=pre_trained_model,
        target_update_num=target_update_num)

    # Train:
    training(agent, epochs, update_num, num_warmup_steps)

    # Test with dataset that includes val-data:
    env.use_all_data()
    testing(agent)

Example #3

0

Show file

File: example_q_table.py Project: maik97/peak-shaver

# Number of warm-up steps:
num_warmup_steps = 100
# Train every x number of steps:
update_num = 100
# Number of epochs and steps:
epochs = 100

# Setup reward_maker
r_maker = reward_maker(
    LOGGER=logger,
    # Settings:
    COST_TYPE='exact_costs',
    R_TYPE='savings_focus',
    R_HORIZON='single_step',
    # Parameter to calculate costs:
    cost_per_kwh=0.2255,
    LION_Anschaffungs_Preis=34100,
    LION_max_Ladezyklen=1000,
    SMS_Anschaffungs_Preis=115000 / 3,
    SMS_max_Nutzungsjahre=20,
    Leistungspreis=102)

# Setup common_env
env = common_env(
    reward_maker=r_maker,
    df=df,
    power_dem_df=power_dem_df,
    # Datset Inputs for the states:
    input_list=['norm_total_power', 'normal', 'seq_max'],
    # Batters stats:

Example #4

0

Show file

File: tuning_PPO2.py Project: maik97/peak-shaver

def run_agent(name='', learning_rate=0.00025, gamma=0.99, n_steps=2500, ent_coef=0.01, vf_coef=0.5, cliprange=0.2,input_list=['norm_total_power','normal','seq_max'],norm=None):
    # Naming the agent:
    now    = datetime.now()
    NAME   = 'agent_PPO2_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    # Import dataset and logger based on the common settings
    df, power_dem_df, logger, period_min = cms.dataset_and_logger(NAME)

    epochs = 100


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        # Agents from stable base-lines cant use multi-step rewards from our code
        # So R_HOTIZON can only be 'single-step'
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,
        norm_range              = norm)

    # Setup common_env
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = 25,
        max_LION_SoC       = 54,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # PPO can use conti values:
        ACTION_TYPE    = 'contin',
        OBS_TYPE       = 'contin',
        # Tells the environment to make standart GYM outputs, 
        # so agents from stable-baselines (or any other RL-library that uses gym) can be used
        AGENT_TYPE     = 'standart_gym',
        val_split      = 0.1)

    # Create vectorised environment:
    dummy_env = DummyVecEnv([lambda: env])

    # Callback:
    checkpoint_callback = CheckpointCallback(save_freq=100000, save_path=cms.__dict__['D_PATH']+'agent-models/',
                                             name_prefix=NAME)

    # Setup Model:
    model = PPO2(MlpPolicy, dummy_env, verbose=1, tensorboard_log=cms.__dict__['D_PATH']+'agent-logs/',
                learning_rate=learning_rate, gamma=gamma, n_steps=n_steps, ent_coef=ent_coef, vf_coef=vf_coef, cliprange=cliprange)#, nminibatches=1)
    #model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log=DATENSATZ_PATH+'LOGS/agent_logging',callback=checkpoint_callback, n_steps=2500)
    
    # Train:
    model.learn(total_timesteps=epochs*len(env.__dict__['df']), tb_log_name=NAME)
    model.save(cms.__dict__['D_PATH']+"agent-models/"+NAME)

Example #5

0

Show file

# Number of warm-up steps:
num_warmup_steps = 100
# Train every x number of steps:
update_num = 50
# Number of epochs and steps:
epochs = 100

# Setup reward_maker
r_maker = reward_maker(
    LOGGER=logger,
    # Settings:
    COST_TYPE='exact_costs',
    R_TYPE='savings_focus',
    # R_HORIZON is now an int for the periodes of the reward horizon:
    R_HORIZON=12,
    # Additional the multi-step strategy must be set:
    M_STRATEGY='sum_to_terminal',
    # Parameter to calculate costs:
    cost_per_kwh=0.2255,
    LION_Anschaffungs_Preis=34100,
    LION_max_Ladezyklen=1000,
    SMS_Anschaffungs_Preis=115000 / 3,
    SMS_max_Nutzungsjahre=20,
    Leistungspreis=102)

# Setup common_env
env = common_env(
    reward_maker=r_maker,
    df=df,
    power_dem_df=power_dem_df,
    # Datset Inputs for the states:
    input_list=['norm_total_power', 'normal', 'seq_max'],

Example #6

0

Show file

File: tuning_q_table.py Project: maik97/peak-shaver

def run_agent(name='',gamma=.9, lr=0.1, update_num=100, load_table=None,
              epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max']):
    
    # Naming the agent:
    now    = datetime.now()
    NAME   = 'agent_Q-Table_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    # Import dataset and logger based on the common settings
    df, power_dem_df, logger, period_min = dataset_and_logger(NAME)

    # Number of warm-up steps:
    num_warmup_steps = 100
    # Number of epochs and steps:
    epochs           = 100


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,)

    # Setup common_env
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = 25,
        max_LION_SoC       = 54,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # Q-Table can only take discrete inputs and make discrete outputs
        ACTION_TYPE    = 'discrete',
        OBS_TYPE       = 'discrete',
        # Set number of discrete values:
        discrete_space = 22,
        # Size of validation data:
        val_split      = 0.1)

    # Setup agent:
    agent = Q_Learner(
        env            = env,
        memory_len     = update_num,
        # Training parameter:
        gamma          = gamma,
        epsilon        = 0.99,
        epsilon_min    = 0.1,
        epsilon_decay  = epsilon_decay,
        lr             = lr,
        load_table     = load_table)

    # Train:
    training(agent, epochs, update_num, num_warmup_steps)

    # Test with dataset that includes val-data:
    env.use_all_data()
    testing(agent)

Example #7

0

Show file

File: test_heuristic.py Project: maik97/peak-shaver

def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred', test_name='', epochs=1,
                 threshold_dem=50, deactivate_SMS=False, deactivate_LION=False,
                 num_past_periods=12,num_outputs=12,TYPE_LIST=['NORMAL'],seq_transform=['MAX'],
                 max_SMS_SoC=25,max_LION_SoC=54,no_pred=False):

    # Naming the agent:
    now = datetime.now()
    if test_name != 'Configurations':
        NAME = 'heuristic_'+test_name+'_'+HEURISTIC_TYPE+'_'+str(round(threshold_dem))+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")
    else:
        if deactivate_SMS == False:
            SMS_string = 'SMS'
        else:
            SMS_string = 'None'
        if deactivate_LION == False:
            LION_string = 'LION'
        else:
            LION_string = 'None'

        NAME = str(round(threshold_dem))+'-'+LION_string+'-'+SMS_string
        NAME = 'heuristic_'+test_name+'_'+HEURISTIC_TYPE+'_'+NAME+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    
    # Import dataset and logger based on the common settings
    df, power_dem_df, input_list = load_specific_sets(num_past_periods, num_outputs, TYPE_LIST, seq_transform, no_pred)
    logger, period_min           = load_logger(NAME, only_per_episode=False)


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,
        # Setup logging tags:
        logging_list            = ['cost_saving','exact_costs','sum_exact_costs','sum_cost_saving'],
        # Deactivation options for the batteries:
        deactivate_SMS          = deactivate_SMS,
        deactivate_LION         = deactivate_LION)


    # Lade Environment:
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = max_SMS_SoC,#/1.2,
        max_LION_SoC       = max_LION_SoC,#/1.2,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # Heuristics can only use continious values:
        ACTION_TYPE    = 'contin',
        OBS_TYPE       = 'contin',
        # Define heuristic usage:
        AGENT_TYPE     = 'heuristic',
        val_split      = 0)

    # Use the complete dataset (no validation split):
    #env.use_all_data()

    # Setup Agent
    agent = heurisitc(
        env = env,
        HEURISTIC_TYPE = HEURISTIC_TYPE,
        threshold_dem  = threshold_dem)


    return agent.calculate(epochs=epochs,LSTM_column=input_list[-1])

Example #8

0

Show file

def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred-Heuristic',
                  epochs=1,
                  threshold_dem=50,
                  deactivate_SMS=True,
                  deactivate_LION=True):

    # Naming the agent and setting up the directory path:
    now = datetime.now()
    NAME = str(
        round(threshold_dem)) + '_NO_BATTERY_' + HEURISTIC_TYPE + now.strftime(
            "_%d-%m-%Y_%H-%M-%S")
    D_PATH = '_small_d/'

    # Load the dataset:
    main_dataset = mainDataset(D_PATH=D_PATH,
                               period_string_min='15min',
                               full_dataset=True)

    # Normalized dataframe:
    df = main_dataset.make_input_df(drop_main_terminal=False,
                                    use_time_diff=True,
                                    day_diff='holiday-weekend')

    # Sum of the power demand dataframe (nor normalized):
    power_dem_df = main_dataset.load_total_power()[24:-12]

    # Load the LSTM input dataset:
    lstm_dataset = lstmInputDataset(main_dataset, df, num_past_periods=12)

    # Making predictions:
    normal_predictions = wahrsager(lstm_dataset, power_dem_df,
                                   TYPE='NORMAL').pred()[:-12]
    seq_predictions = wahrsager(lstm_dataset,
                                power_dem_df,
                                TYPE='SEQ',
                                num_outputs=12).pred()

    # Adding the predictions to the dataset:
    df = df[24:-12]
    df['normal'] = normal_predictions
    df['seq_max'] = max_seq(seq_predictions)

    logger = Logger(NAME, D_PATH)

    # Setup reward_maker
    r_maker = reward_maker(LOGGER=logger,
                           COST_TYPE='exact_costs',
                           R_TYPE='savings_focus',
                           R_HORIZON='single_step',
                           cost_per_kwh=0.2255,
                           LION_Anschaffungs_Preis=34100,
                           LION_max_Ladezyklen=1000,
                           SMS_Anschaffungs_Preis=115000 / 3,
                           SMS_max_Nutzungsjahre=20,
                           Leistungspreis=102,
                           logging_list=[
                               'cost_saving', 'exact_costs', 'sum_exact_costs',
                               'sum_cost_saving'
                           ],
                           deactivate_SMS=deactivate_SMS,
                           deactivate_LION=deactivate_LION)

    # Lade Environment:
    env = common_env(reward_maker=r_maker,
                     df=df,
                     power_dem_df=power_dem_df,
                     input_list=['norm_total_power', 'normal', 'seq_max'],
                     max_SMS_SoC=12 / 3,
                     max_LION_SoC=54,
                     PERIODEN_DAUER=15,
                     ACTION_TYPE='contin',
                     OBS_TYPE='contin',
                     AGENT_TYPE='heuristic')

    agent = heurisitc(env=env,
                      HEURISTIC_TYPE=HEURISTIC_TYPE,
                      threshold_dem=threshold_dem)

    return agent.calculate(epochs=epochs)