Esempio n. 1
0
def main_function(location, num_of_panels, num_of_turbines, num_of_batteries):
    # Get arguments
    if len(sys.argv) > 1:
        episodes_num = int(sys.argv[1])
    else:
        episodes_num = 2000

    # House dependent parameters
    # location = 'California'
    # num_of_panels = 30   # Number of 250-watts solar panels
    # num_of_turbines = 2  # Number of 400 KW wind turbines
    # num_of_batteries = 2

    house = House(location, num_of_panels, num_of_turbines, num_of_batteries)

    # Main dependent parameters
    num_of_months = 12
    num_of_days = 30  # number of days per episode
    num_time_states = 4
    epsilon = 0.5
    alpha = 0.8

    # Initiate Agent
    agent = Agent()
    Q = agent.initialize_Q()
    avg_Q_old = np.mean(Q)

    # For printing and plots
    print_iteration = 50
    # ARMAN: What is a print_flag?
    print_flag = False

    # ARMAN: Needs comments
    rList = []
    solarList = []
    windList = []
    ffList = []
    battstorageList = []
    battusedList = []
    energyList = []

    solarSubList = []
    windSubList = []
    ffSubList = []
    battstorageSubList = []
    battusedSubList = []

    final_itr = []
    final_list = []

    final_solar = []
    solar_dict = {0: [], 1: [], 2: [], 3: []}

    final_wind = []
    wind_dict = {0: [], 1: [], 2: [], 3: []}

    final_ff = []
    ff_dict = {0: [], 1: [], 2: [], 3: []}

    final_battery = []
    battery_dict = {0: [], 1: [], 2: [], 3: []}

    ## for realtime plotting
    # fig, ax = plt.subplots()
    # ax.set_ylabel("Energy (kWh)")
    # ax.set_title("Evolution of Energy Use")

    for itr in range(episodes_num):
        if itr % print_iteration == 0:
            print_flag = True

        # The house stays constant for every episode
        env = EnergyEnvironment(house)
        cur_state = env.state
        total_reward = 0

        solar_avg = 0
        wind_avg = 0
        ff_avg = 0
        batt_storage_avg = 0
        batt_used_avg = 0

        # for month in range(num_of_months):
        #     env.state[env.month_index] = month

        for day in range(num_of_days):
            total_solar_energy = 0
            total_wind_energy = 0
            total_grid_energy = 0
            total_battery_used = 0

            for i in range(num_time_states):
                action, cur_state_index, action_index = agent.get_action(
                    cur_state, Q, epsilon)
                reward, next_state = env.step(action, cur_state)

                Q = agent.get_Q(action, cur_state, Q, epsilon, cur_state_index,
                                action_index, reward, alpha)

                cur_state = next_state
                total_reward += reward

                # calculate total
                total_solar_energy += env.solar_energy
                total_wind_energy += env.wind_energy
                total_grid_energy += env.grid_energy
                total_battery_used += env.battery_used

                if itr == (episodes_num - 1):
                    solar_dict[i].append(env.solar_energy)
                    wind_dict[i].append(env.wind_energy)
                    ff_dict[i].append(env.grid_energy)
                    battery_dict[i].append(env.battery_used)

            # store how much is stored in the battery at the end of each day
            total_battery_stored = env.battery_energy

            # save total daily energy produced from different sources
            solarSubList.append(total_solar_energy)
            windSubList.append(total_wind_energy)
            ffSubList.append(total_grid_energy)
            battstorageSubList.append(total_battery_stored)
            battusedSubList.append(total_battery_used)

            solar_avg = np.mean(solarSubList)
            wind_avg = np.mean(windSubList)
            ff_avg = np.mean(ffSubList)
            batt_storage_avg = np.mean(battstorageSubList)
            batt_used_avg = np.mean(battusedSubList)

        if print_flag:
            avg_Q_new = np.mean(Q)
            avg_Q_change = abs(avg_Q_new - avg_Q_old)
            utils.print_info(itr, env, solar_avg, wind_avg, ff_avg,
                             batt_storage_avg, batt_used_avg, avg_Q_change)
            avg_Q_old = avg_Q_new
            solarList.append(solar_avg)
            windList.append(wind_avg)
            ffList.append(ff_avg)
            battstorageList.append(batt_storage_avg)
            battusedList.append(np.mean(batt_used_avg))

            # plt.ion()
            # plots.real_time_plot([[solar_avg], [wind_avg], [ff_avg],
            #                                [batt_storage_avg], [batt_used_avg]],
            #                     colors=['b', 'g', 'r', 'purple', 'gray'],
            #                     legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage",
            #                              "Battery Usage"], ax=ax)

            solarSubList = []
            windSubList = []
            ffSubList = []
            battstorageSubList = []
            battusedSubList = []

        print_flag = False

        #total reward per episode appended for learning curve visualization
        rList.append(total_reward)

        #decrease exploration factor by a little bit every episode
        epsilon = max(0, epsilon - 0.0005)
        alpha = max(0, alpha - 0.0005)

    # plt.close()
    print("Score over time: " + str(sum(rList) / episodes_num))
    print("Q-values:", Q)

    plots.plot_learning_curve(rList)

    for i in range(num_time_states):
        final_solar.append(np.mean(solar_dict[i]))
        final_wind.append(np.mean(wind_dict[i]))
        final_ff.append(np.mean(ff_dict[i]))
        final_battery.append(np.mean(battery_dict[i]))

    energyList.append(solarList)
    energyList.append(windList)
    energyList.append(ffList)
    # energyList.append(battstorageList)
    energyList.append(battusedList)

    final_itr.append(final_solar)
    final_itr.append(final_wind)
    final_itr.append(final_ff)
    final_itr.append(final_battery)

    # plots.multiBarPlot_final(list(range(4)), final_itr, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)",
    #              title="Final Iteration of Energy Use", legends=["Solar Energy",  "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"])
    #
    # plots.multiBarPlot(list(range(len(solarList))), energyList, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)",
    #              title="Evolution of Energy Use", legends=["Solar Energy",  "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"])

    return list(range(len(solarList))), energyList, list(
        range(len(final_solar))), final_itr, list(range(len(rList))), rList
Esempio n. 2
0
terminal = True
eval_rewards = []
evaluate_frame_number = 0

for frame in range(EVAL_LENGTH):
    if terminal:
        game_wrapper.reset(evaluation=True)
        life_lost = True
        episode_reward_sum = 0
        terminal = False

    # Breakout require a "fire" action (action #1) to start the
    # game each time a life is lost.
    # Otherwise, the agent would sit around doing nothing.
    action = 1 if life_lost else agent.get_action(
        0, game_wrapper.state, evaluation=True)

    # Step action
    _, reward, terminal, life_lost = game_wrapper.step(action,
                                                       render_mode='human')
    evaluate_frame_number += 1
    episode_reward_sum += reward

    # On game-over
    if terminal:
        print(
            f'Game over, reward: {episode_reward_sum}, frame: {frame}/{EVAL_LENGTH}'
        )
        eval_rewards.append(episode_reward_sum)

print('Average reward:',
Esempio n. 3
0
            # TRAINING EPOCH
            # We evaluate and save our model after a number of
            # epoch, controlled by frame numbers in our config.
            while epoch_frame < FRAMES_BETWEEN_EVAL:
                start_time = time.time()
                game_wrapper.reset()
                life_lost = True
                episode_reward_sum = 0

                # TRAINING EPISODE
                # One episode is one game, after which we update
                # our metrics. If the episode takes longer than anticipated
                # we can shortcircuit (to avoid less valuable training).
                for _ in range(MAX_EPISODE_LENGTH):
                    # Get action
                    action = agent.get_action(frame_number, game_wrapper.state)

                    # Take step
                    processed_frame, reward, terminal, life_lost = game_wrapper.step(
                        action)
                    frame_number += 1
                    epoch_frame += 1
                    episode_reward_sum += reward

                    # Add experience to replay memory
                    agent.add_experience(action=action,
                                         frame=processed_frame[:, :, 0],
                                         reward=reward,
                                         clip_reward=CLIP_REWARD,
                                         terminal=life_lost)