def reset(self):
        if self.time >= self.end_data_date and self.train == "train":
            #print("reset time and light file accordingly")
            self.time = self.time_begin
            self.data_pointer = 0
            self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array = Ember_RL_func.build_inputs(self.time, num_hours_input, num_minutes_input, self.start_light_list, self.start_sc_volt_list)
            self.SC_Volt_array = Ember_RL_func.add_random_volt(self.SC_Volt_array)

        self.week_end = Ember_RL_func.calc_week(self.time, num_week_input)
        self.end = self.time + datetime.timedelta(hours=24*episode_lenght)

        return self.hour_array, self.light_array, self.SC_Volt_array
Beispiel #2
0
def training_PPO(start_train_date, end_train_date, resume, diff_days):
    config = ppo.DEFAULT_CONFIG.copy()
    config["observation_filter"] = 'MeanStdFilter'
    config["batch_mode"] = "complete_episodes"
    config["lr"] = 1e-4
    config["num_workers"] = num_cores
    config["env_config"] = {
        "settings": settings,
        "main_path": curr_path,
        "start_train": start_train_date,
        "end_train": end_train_date,
        "train/test": "train",
        "sc_volt_start_train": sc_volt_train,
        "diff_days": diff_days,
        "GT_hour_start": 0,
    }
    trainer = ppo.PPOTrainer(config=config, env="simplePible")

    if resume_path != "":
        print("Restoring checkpoint: ", resume)
        sleep(5)
        trainer.restore(
            resume
        )  # Can optionally call trainer.restore(path) to load a checkpoint.

    for i in range(0, int(settings[0]["training_iterations"])):
        result = trainer.train()
        print(pretty_print(result))

        if int(result["training_iteration"]) % 10 == 0:
            checkpoint = trainer.save()
            print("checkpoint saved at", checkpoint)
            checkp_split = checkpoint.split('/')
            parent_dir = '/'.join(checkp_split[0:-2])

    # Remove previous agents and save bew agetn into Agents_Saved
    Ember_RL_func.rm_old_save_new_agent(parent_dir, save_agent_folder)
    def __init__(self, config):

        settings = config["settings"]
        self.config = config
        file_light = settings[0]["file_light"]
        self.light_div = float(settings[0]["light_divider"])
        self.PIR_or_thpl = settings[0]["PIR_or_thpl"]
        self.path_light_data = config["main_path"] + "/" + file_light
        self.train = config["train/test"]
        self.GT_mode = settings[0]["SS_mode"]
        self.diff_days = config["diff_days"]
        self.rem_miss = settings[0]["train_gt/rm_miss/inject"]

        if self.train == "train":
            self.start_data_date = config["start_train"]
            self.start_sc = float(config["sc_volt_start_train"])
            self.end_data_date = config["end_train"]
        elif self.train == "test":
            self.start_data_date = config["start_test"]
            self.end_data_date = config["end_test"]
            if isinstance(config["sc_volt_start_test"], np.ndarray):
                self.start_sc = config["sc_volt_start_test"]
            else:
                self.start_sc = float(config["sc_volt_start_test"])

        self.time = self.start_data_date
        self.time_begin = self.time
        self.end = self.time + datetime.timedelta(hours=24*episode_lenght)
        self.data_pointer = 0
        self.PIR_events_found_dict = []

        #print("Starting looking for data in between: ", self.start_data_date, self.end_data_date)
        self.start_light_list, self.start_sc_volt_list, starter_data = Ember_RL_func.select_input_starter(self.path_light_data, self.start_data_date, num_light_input, num_sc_volt_input)

        if self.train == "test" and self.start_sc != '':
            self.start_sc_volt_list = Ember_RL_func.adjust_sc_voltage(self.start_sc_volt_list, self.start_sc)

        self.file_data = Ember_RL_func.select_input_data(self.path_light_data, self.start_data_date, self.end_data_date)
        if self.train == 'test':
            if self.file_data == []:
                print("\nInput data error or something else. Check!\n")
                exit()

        self.last = starter_data[0]
        splitt = self.last.split('|')

        self.light = int(int(splitt[8])/self.light_div)

        self.action_space = spaces.Tuple((
            spaces.Discrete(2),
            #spaces.Box(s_t_min_act, s_t_max_act, shape=(1, ), dtype=np.float32) # State Transition
        ))

        self.observation_space = spaces.Tuple((
            spaces.Box(0, 23, shape=(num_hours_input, ), dtype=np.float32),       # hours
            #spaces.Box(0, 59, shape=(num_minutes_input, ), dtype=np.float32),       # hours
            spaces.Box(0, light_max, shape=(num_light_input, ), dtype=np.float32),       # light
            spaces.Box(SC_volt_min, SC_volt_max, shape=(num_sc_volt_input, ), dtype=np.float32),
            #spaces.Box(0, 1, shape=(num_week_input, ), dtype=np.float32),  #week/weekends
            #spaces.Box(0, 10, shape=(1, ), dtype=np.float32),  #number of events
        ))

        self.Reward = []; self.Mode = []; self.Time = []; self.Light = []; self.PIR_ON_OFF= []; self.THPL_ON_OFF = []; self.SC_Volt = []; self.State_Trans = []; self.thpl_event_det = []; self.thpl_event_miss = []
        self.PIR_event_det = []; self.PIR_event_miss = []; self.PIR_tot_events_detect = 0; self.thpl_tot_events = 0; self.thpl_tot_events_detect = 0; self.PIR_tot_events = 0;  self.mode = 0
        self.PIR_gt = []; self.THPL_gt = []

        self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array = Ember_RL_func.build_inputs(self.time, num_hours_input, num_minutes_input, self.start_light_list, self.start_sc_volt_list)

        self.GT_hour_start = int(config["GT_hour_start"])
        self.gt_hours = Ember_RL_func.gt_mode_hours((self.SC_Volt_array[0]/SC_volt_max)*100)

        self.info = {"config": config}
        self.save_data()
    def step(self, action):

        action_0 = action[0]

        PIR_gt = np.nan; THPL_gt = np.nan
        if self.GT_mode == '1' and self.train != 'train':
            #print(self.time.hour, self.GT_hour_start, self.gt_hours, str(self.GT_hour_start + self.gt_hours))
            if self.time.hour >= self.GT_hour_start and self.time.hour < (self.GT_hour_start + self.gt_hours):
                action_0 = 1

                if self.PIR_or_thpl == '1':
                    THPL_gt = 1
                elif self.PIR_or_thpl == '0':
                    PIR_gt = 1
                print("GT action taken")

        if self.PIR_or_thpl == '1':
            self.PIR_on_off = 0
            self.thpl_on_off = action_0
        elif self.PIR_or_thpl == '0':
            self.PIR_on_off = action_0
            self.thpl_on_off = 0

        self.next_wake_up_time = 60

        self.time_next = self.time + datetime.timedelta(minutes=self.next_wake_up_time) #next_wake_up_time # in min

        self.light, PIR_event_gt, PIR_events_found_dict, thpl_event_gt, self.data_pointer = Pible_param_func.light_event_func_new(self.time, self.time_next, self.mode, self.PIR_on_off,
                                                                                                                                  self.PIR_events_found_dict, self.light, self.light_div,
                                                                                                                                  self.file_data, self.data_pointer)
        if self.PIR_or_thpl == '0':
            thpl_event_gt = 0
        elif self.PIR_or_thpl == '1':
            PIR_event_gt = 0

        PIR_event_det, PIR_event_miss, thpl_event_det, thpl_event_miss = Pible_param_func.event_det_miss(PIR_event_gt, thpl_event_gt, self.PIR_on_off, self.thpl_on_off, self.SC_Volt_array)

        SC_temp, en_prod, en_used = Pible_param_func.Energy(self.SC_Volt_array[0], self.light, self.PIR_or_thpl, self.PIR_on_off, self.thpl_on_off, self.next_wake_up_time, PIR_event_det, thpl_event_det)

        reward = self.get_reward_low_level(en_prod, en_used, PIR_event_det, PIR_event_miss, thpl_event_det, thpl_event_miss, self.PIR_on_off, self.thpl_on_off, self.SC_Volt_array)

        if self.train == 'test':
            self.Reward.append(reward); self.Time.append(self.time); self.Mode.append(self.mode); self.Light.append(self.light); self.PIR_ON_OFF.append(self.PIR_on_off); self.THPL_ON_OFF.append(self.thpl_on_off); self.SC_Volt.append(SC_temp);
            self.State_Trans.append(self.next_wake_up_time); self.PIR_event_det.append(PIR_event_det); self.PIR_event_miss.append(PIR_event_miss);
            self.thpl_event_det.append(thpl_event_det); self.thpl_event_miss.append(thpl_event_miss); self.PIR_gt.append(PIR_gt); self.THPL_gt.append(THPL_gt)

        self.PIR_tot_events_detect += PIR_event_det
        self.PIR_tot_events += PIR_event_gt
        self.thpl_tot_events_detect += thpl_event_det
        self.thpl_tot_events += thpl_event_gt

        self.time = self.time_next

        self.week_end = Ember_RL_func.calc_week(self.time, num_week_input)

        self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array = Ember_RL_func.updates_arrays(self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array, self.time, self.light, SC_temp)

        done = self.time >= self.end

        self.info["energy_used"] = en_used
        self.info["energy_prod"] = en_prod
        self.info["PIR_tot_events"] = self.PIR_tot_events
        self.info["PIR_events_detect"] = self.PIR_tot_events_detect
        self.info["thpl_tot_events"] = self.thpl_tot_events
        self.info["thpl_events_detect"] = self.thpl_tot_events_detect

        self.info["SC_volt"] = self.SC_Volt_array
        self.info["state_transition"] = self.next_wake_up_time
        self.info["GT_hours_start"] = 6 if (self.GT_hour_start + self.gt_hours) >= 24 else (self.GT_hour_start + self.gt_hours)

        if self.train != "train" and done:
            self.save_data()

        return (self.hour_array, self.light_array, self.SC_Volt_array), reward, done, self.info
def test_and_print_results(agent_folder, iteration, start_date, end_date,
                           title, curr_path, sc_volt_test, train_test_real,
                           diff_days, GT_hour):
    train_test_real_orig = train_test_real
    train_test_real = 'test' if train_test_real == 'train' else train_test_real
    path = glob.glob(agent_folder + '/checkpoint_' + str(iteration) +
                     '/checkpoint-' + str(iteration),
                     recursive=True)

    config = ppo.DEFAULT_CONFIG.copy()
    config["observation_filter"] = 'MeanStdFilter'
    config["batch_mode"] = "complete_episodes"
    config["num_workers"] = 0
    config["explore"] = False
    config["env_config"] = {
        "settings": settings,
        "main_path": curr_path,
        "train/test": train_test_real,
        "start_test": start_date,
        "end_test": end_date,
        "sc_volt_start_test": sc_volt_test,
        "diff_days": diff_days,
        "GT_hour_start": GT_hour,
        "resume_from_iter": iteration,
    }
    if train_test_real == "real":
        #Ember_RL_func.sync_input_data(settings[0]["pwd"], settings[0]["bs_name"], settings[0]["file_light"], "")
        fold = os.path.basename(os.getcwd())
        ID_temp = fold.split('_')[-1]
        action_file = ID_temp + "_action.json"

    agent = ppo.PPOTrainer(config=config, env="simplePible")
    agent.restore(path[0])
    env = SimplePible(config["env_config"])
    obs = env.reset()
    tot_rew = 0
    energy_used_tot = 0
    energy_prod_tot = 0
    print("initial observations: ", obs)
    while True:

        learned_action = agent.compute_action(observation=obs, )

        if train_test_real == "real":
            learned_action = Ember_RL_func.correct_action(obs, learned_action)
            Ember_RL_func.sync_action(action_file, learned_action,
                                      settings[0]["PIR_or_thpl"])
            Ember_RL_func.sync_ID_file_to_BS(settings[0]["pwd"],
                                             settings[0]["bs_name"],
                                             action_file,
                                             "/home/pi/Base_Station_20/ID/")
            print("action_taken: ", learned_action)
            if isinstance(learned_action, list):
                print("sleeping " + str(learned_action[0][1]) +
                      " mins decided by RL")
                sleep(int(learned_action[0][1]) * 60)
            else:
                print("sleeping fixed " + str(60) + " mins")
                sleep(60 * 60)
            Ember_RL_func.sync_input_data(settings[0]["pwd"],
                                          settings[0]["bs_name"],
                                          settings[0]["file_light"], "")

        obs, reward, done, info = env.step(learned_action)
        print(obs)
        print(learned_action, reward, info["thpl_tot_events"])

        energy_used_tot += float(info["energy_used"])
        energy_prod_tot += float(info["energy_prod"])
        tot_rew += reward

        if done:
            obs = env.reset()
            start_date = start_date + datetime.timedelta(days=episode_lenght)
            if start_date >= end_date:
                print("done")
                break

    print("tot reward", round(tot_rew, 3))
    print("Energy Prod per day: ", energy_prod_tot / episode_lenght,
          "Energy Used: ", energy_used_tot / episode_lenght)
    print("Detected events averaged per day: ",
          (int(info["PIR_events_detect"]) + int(info["thpl_events_detect"])) /
          episode_lenght)
    print("Tot events averaged per day: ",
          (int(info["PIR_tot_events"]) + int(info["thpl_tot_events"])) /
          episode_lenght)
    accuracy = Ember_RL_func.calc_accuracy(info)
    print("Accuracy: ", accuracy)

    if train_test_real_orig == "test":  #or train_test_real_orig == "train":
        env.render(tot_rew, title, energy_used_tot, accuracy)

    return path, info["SC_volt"], int(info["GT_hours_start"])
def training_PPO(start_train_date, end_train_date, resume, diff_days):
    config = ppo.DEFAULT_CONFIG.copy()
    config["observation_filter"] = 'MeanStdFilter'
    config["batch_mode"] = "complete_episodes"
    config["lr"] = 1e-4
    config["num_workers"] = num_cores
    config["env_config"] = {
        "settings": settings,
        "main_path": curr_path,
        "start_train": start_train_date,
        "end_train": end_train_date,
        "train/test": "train",
        "sc_volt_start_train": sc_volt_train,
        "diff_days": diff_days,
        "GT_hour_start": 0,
    }
    trainer = ppo.PPOTrainer(config=config, env="simplePible")

    if resume_path != "":
        print("Restoring checkpoint: ", resume)
        sleep(5)
        trainer.restore(
            resume
        )  # Can optionally call trainer.restore(path) to load a checkpoint.

    global prev_res
    prev_res = []

    for i in range(0, int(settings[0]["training_iterations"])):
        result = trainer.train()
        print(pretty_print(result))

        if int(result["training_iteration"]) % 10 == 0:
            #if max_min > int(result["episode_reward_mean"])
            checkpoint = trainer.save()
            print("checkpoint saved at", checkpoint)
            checkp_split = checkpoint.split('/')
            parent_dir = '/'.join(checkp_split[0:-2])

            curr_res = float(result["episode_reward_mean"])
            #if (int(result["training_iteration"]) > 10) and prev_res != []:
            if prev_res != [] and curr_res != 0.0:
                avg_res = sum(prev_res) / len(prev_res)
                print(curr_res, avg_res)
                diff_perc = (((curr_res - avg_res) / curr_res) * 100)
                print("\nDiff Percentage: ", diff_perc)
                if diff_perc < 5 and diff_perc > -5:
                    print("Converged!")
                    sleep(2)
                    break

            if len(prev_res) >= 4:
                prev_res = np.roll(prev_res, 1)
                prev_res[0] = curr_res
            else:
                prev_res.append(curr_res)

            #print(prev_res)
            #sleep(4)

# Remove previous agents and save bew agetn into Agents_Saved
#print("out", parent_dir, save_agent_folder)
    Ember_RL_func.rm_old_save_new_agent(parent_dir, save_agent_folder)
                prev_res = np.roll(prev_res, 1)
                prev_res[0] = curr_res
            else:
                prev_res.append(curr_res)

            #print(prev_res)
            #sleep(4)

# Remove previous agents and save bew agetn into Agents_Saved
#print("out", parent_dir, save_agent_folder)
    Ember_RL_func.rm_old_save_new_agent(parent_dir, save_agent_folder)

if __name__ == "__main__":

    print("Updating code...")
    Ember_RL_func.update_code()
    print("Starting RL Agent")

    register_env("simplePible", lambda config: SimplePible(config))

    #print("curr path: " , sys.argv[1])
    #curr_path = sys.argv[1]
    curr_path = os.getcwd()

    # Use the following settings
    with open('settings.json', 'r') as f:
        settings = json.load(f)

    title = settings[0]["title"]
    train_test_real = settings[0]["train/test/real"]
    fold = settings[0]["agent_saved_folder"]
Beispiel #8
0
def test_and_print_results(agent_folder, iteration, start_date, end_date,
                           title, curr_path, sc_volt_test, train_test_real,
                           diff_days, GT_hour):
    train_test_real_orig = train_test_real
    train_test_real = 'test' if train_test_real == 'train' else train_test_real
    path = glob.glob(agent_folder + '/checkpoint_' + str(iteration) +
                     '/checkpoint-' + str(iteration),
                     recursive=True)

    config = ppo.DEFAULT_CONFIG.copy()
    config["observation_filter"] = 'MeanStdFilter'
    config["batch_mode"] = "complete_episodes"
    config["num_workers"] = 0
    config["explore"] = False
    config["env_config"] = {
        "settings": settings,
        "main_path": curr_path,
        "train/test": train_test_real,
        "start_test": start_date,
        "end_test": end_date,
        "sc_volt_start_test": sc_volt_test,
        "diff_days": diff_days,
        "GT_hour_start": GT_hour,
        "resume_from_iter": iteration,
    }

    agent = ppo.PPOTrainer(config=config, env="simplePible")
    agent.restore(path[0])
    env = SimplePible(config["env_config"])
    obs = env.reset()
    tot_rew = 0
    energy_used_tot = 0
    energy_prod_tot = 0

    print("initial observations: ", obs)
    while True:
        learned_action = agent.compute_action(observation=obs, )
        obs, reward, done, info = env.step(learned_action)

        energy_used_tot += float(info["energy_used"])
        energy_prod_tot += float(info["energy_prod"])
        tot_rew += reward

        if done:
            obs = env.reset()
            start_date = start_date + datetime.timedelta(days=episode_lenght)
            if start_date >= end_date:
                print("done")
                break

    print("tot reward", round(tot_rew, 3))
    print("Energy Prod per day: ", energy_prod_tot / episode_lenght,
          "Energy Used: ", energy_used_tot / episode_lenght)
    print("Detected events averaged per day: ",
          (int(info["PIR_events_detect"]) + int(info["thpl_events_detect"])) /
          episode_lenght)
    print("Tot events averaged per day: ",
          (int(info["PIR_tot_events"]) + int(info["thpl_tot_events"])) /
          episode_lenght)
    accuracy = Ember_RL_func.calc_accuracy(info)
    print("Accuracy: ", accuracy)

    env.render(tot_rew, title, energy_used_tot, accuracy)

    return path, info["SC_volt"], int(info["GT_hours_start"])
Beispiel #9
0
    print("Starting RL Agent")

    register_env("simplePible", lambda config: SimplePible(config))
    curr_path = os.getcwd()

    # Use the following settings
    with open('settings.json', 'r') as f:
        settings = json.load(f)

    title = settings[0]["title"]
    train_test_real = settings[0]["train/test/real"]
    fold = settings[0]["agent_saved_folder"]
    num_cores = settings[0]["num_cores"]

    if num_cores == "max":
        num_cores = Ember_RL_func.cores_available()
    else:
        num_cores = int(num_cores)

    sc_volt_train = float(settings[0]["sc_volt_start_train"])
    sc_volt_test = float(settings[0]["sc_volt_start_test"])
    save_agent_folder = curr_path + "/" + fold
    GT_hour_start = 6  # GT standes for Ground Truth and it is enable once the SS_Mode is activated
    resume_path = ''

    ray.init()

    start_train_date = datetime.datetime.strptime(settings[0]["start_train"],
                                                  '%m/%d/%y %H:%M:%S')
    end_train_date = datetime.datetime.strptime(settings[0]["end_train"],
                                                '%m/%d/%y %H:%M:%S')
Beispiel #10
0
import Pible_param_func
import Ember_RL_func

files = os.listdir("Save_Data")
print(files)
tot_acc = 0; energy_used_tot = 0; tot_thpl_on_min = 0
acc = []; sc_volt = []; inj_prob_hours = []
num_files = 0
for file in files:
    with open("Save_Data/" + file, "rb") as f:
        file_spl = file.split('_')
        if 'test' in file_spl:
            print(file)
            data = pickle.load(f)
            num_files += 1
            accuracy = Ember_RL_func.calc_accuracy(data["info"])
            acc.append(accuracy)
            tot_acc += accuracy
            sc_volt.append(data["info"]["SC_volt"])
            #inj_prob_hours.append(data["info"]["hours_inj_prob"])
            energy_used_tot += float(data["info"]["energy_used"])
            THPL = data["THPL_ON_OFF"]; State_Trans = data["State_Trans"]
            for i, el in enumerate(THPL):
                if el == 1:
                    tot_thpl_on_min += int(State_Trans[i])
            #Pible_param_func.plot_hist_low_level(data, 0, "title_final", float(data["info"]["energy_used"]), accuracy)

print("Accuracy: ", acc)
print("SC Volt: ", sc_volt)
#print("Inj Prob Hours: ", inj_prob_hours)
print("accuracy: ", round(tot_acc/num_files, 1))
    def __init__(self, config):

        settings = config["settings"]
        self.config = config
        file_light = settings[0]["file_light"]
        self.light_div = float(settings[0]["light_divider"])
        self.PIR_or_thpl = settings[0]["PIR_or_thpl"]
        self.path_light_data = config["main_path"] + "/" + file_light
        self.train = config["train/test"]
        self.GT_mode = settings[0]["GT_mode"]
        self.diff_days = config["diff_days"]
        self.rem_miss = settings[0]["train_gt/rm_miss/inject"]
        #print(config["main_path"], self.path_light_data)

        if self.train == "train":
            #start_data_date = datetime.datetime.strptime(config["start_train"], '%m/%d/%y %H:%M:%S')
            self.start_data_date = config["start_train"]
            #self.end_data_date = datetime.datetime.strptime(config["end_train"], '%m/%d/%y %H:%M:%S')
            self.start_sc = float(config["sc_volt_start_train"])
            self.end_data_date = config["end_train"]
        elif self.train == "test":
            self.start_data_date = config["start_test"]
            self.end_data_date = config["end_test"]
            self.start_sc = float(config["sc_volt_start_test"])
        elif self.train == "real":
            Ember_RL_func.sync_input_data(settings[0]["pwd"], settings[0]["bs_name"], file_light, "")
            #sleep(10)
            #last_row = Ember_RL_func.last_valid_row(file_light)
            self.start_data_date = datetime.datetime.now()
            self.end_data_date = datetime.datetime.now() + datetime.timedelta(days=1)

        #self.time = datetime.datetime.strptime("04/15/20 00:00:00", '%m/%d/%y %H:%M:%S')
        self.time = self.start_data_date
        self.time_begin = self.time
        self.end = self.time + datetime.timedelta(hours=24*episode_lenght)
        self.data_pointer = 0
        self.PIR_events_found_dict = []

        #print("Starting looking for data in between: ", self.start_data_date, self.end_data_date)
        self.start_light_list, self.start_sc_volt_list, starter_data = Ember_RL_func.select_input_starter(self.path_light_data, self.start_data_date, num_light_input, num_sc_volt_input)

        if self.train == "test" and self.start_sc != '':
            self.start_sc_volt_list = Ember_RL_func.adjust_sc_voltage(self.start_sc_volt_list, self.start_sc)

        self.file_data = Ember_RL_func.select_input_data(self.path_light_data, self.start_data_date, self.end_data_date)
        if self.train == 'test':
            if self.file_data == []:
                print("\nInput data error or something else. Check!\n")
                exit()
        #for value in self.file_data:
        #    print(self.data_pointer, value)
        #    sleep(5)
        #self.data_pointer_orig = self.data_pointer
        #print(starter_data)
        #print(starter_data)
        self.last = starter_data[0]
        splitt = self.last.split('|')
        #self.start_sc = (float(splitt[5]) * SC_volt_max)/100
        #print("last line found: ", self.last)

        self.light = int(int(splitt[8])/self.light_div)
        #self.temp = float(line[2])
        #self.hum = float(line[3])
        #self.press = int(line[11])

        if self.PIR_or_thpl == '2':
            self.action_space = spaces.Tuple((
                spaces.Discrete(2), # PIR On_Off = 0 means PIR 0; PIR_onoff = 1 means PIR On
                spaces.Discrete(2), # Sens_On Off same as PIR
                spaces.Box(s_t_min_act, s_t_max_act, shape=(1, ), dtype=np.float32) # State Transition
            ))
        else:
            self.action_space = spaces.Tuple((
                spaces.Discrete(2),
                spaces.Box(s_t_min_act, s_t_max_act, shape=(1, ), dtype=np.float32) # State Transition
            ))
        #self.action_space = spaces.Discrete(2)
        if self.diff_days >= 7:
            self.observation_space = spaces.Tuple((
                spaces.Box(0, 23, shape=(num_hours_input, ), dtype=np.float32),       # hours
                #spaces.Box(0, 59, shape=(num_minutes_input, ), dtype=np.float32),       # hours
                spaces.Box(0, light_max, shape=(num_light_input, ), dtype=np.float32),       # light
                spaces.Box(SC_volt_min, SC_volt_max, shape=(num_sc_volt_input, ), dtype=np.float32),
                spaces.Box(0, 1, shape=(num_week_input, ), dtype=np.float32),  #week/weekends
                #spaces.Box(0, 10, shape=(1, ), dtype=np.float32),  #number of events
            ))
        else:
            self.observation_space = spaces.Tuple((
                spaces.Box(0, 23, shape=(num_hours_input, ), dtype=np.float32),       # hours
                #spaces.Box(0, 59, shape=(num_minutes_input, ), dtype=np.float32),       # hours
                spaces.Box(0, light_max, shape=(num_light_input, ), dtype=np.float32),       # light
                spaces.Box(SC_volt_min, SC_volt_max, shape=(num_sc_volt_input, ), dtype=np.float32),
                #spaces.Box(0, 1, shape=(num_week_input, ), dtype=np.float32),  #week/weekends
                #spaces.Box(0, 10, shape=(1, ), dtype=np.float32),  #number of events
            ))

        self.Reward = []; self.Mode = []; self.Time = []; self.Light = []; self.PIR_ON_OFF= []; self.THPL_ON_OFF = []; self.SC_Volt = []; self.State_Trans = []; self.thpl_event_det = []; self.thpl_event_miss = []
        self.PIR_event_det = []; self.PIR_event_miss = []; self.Len_Dict_Events = []; self.PIR_tot_events_detect = 0; self.thpl_tot_events = 0; self.thpl_tot_events_detect = 0; self.PIR_tot_events = 0;  self.mode = 0
        self.PIR_gt = []; self.THPL_gt = []

        self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array = Ember_RL_func.build_inputs(self.time, num_hours_input, num_minutes_input, self.start_light_list, self.start_sc_volt_list)

        #if self.GT_mode == '1' and self.train != 'train':
        self.GT_hour_start = int(config["GT_hour_start"])
        self.gt_hours = Ember_RL_func.gt_mode_hours((self.SC_Volt_array[0]/SC_volt_max)*100)

        # injecting problems
        self.hours_inj_prob = random.randint(0, 23)

        self.info = {"config": config}
        self.save_data()
    def step(self, action):
        #print("action ", action)
        #action = [1, 0.5]
        #if isinstance(action, list):
        if len(action) == 1:
            action_0 = action[0]
        elif len(action) == 2:
            action_0 = action[0]
            action_1 = action[1]
        elif len(action) == 3:
            action_0 = action[0]
            action_1 = action[1]
            action_2 = action[2]

        if self.rem_miss == '2' and self.train == 'test': # injecting problems
            if self.time.hour >= self.hours_inj_prob and self.time.hour <= (self.hours_inj_prob + 1):
                action_0 = 0
                if len(action) == 3:
                    action_1 = 0
                #print("injecting problems from to: ", self.hours_inj_prob, self.hours_inj_prob + 1)
                #sleep(10)

        PIR_gt = np.nan; THPL_gt = np.nan
        if self.GT_mode == '1' and self.train != 'train':
            #self.gt_hours = Ember_RL_func.gt_mode_hours((self.SC_Volt_array[0]/SC_volt_max)*100)
            print(self.time.hour, self.GT_hour_start, self.gt_hours, str(self.GT_hour_start + self.gt_hours))
            if self.time.hour >= self.GT_hour_start and self.time.hour < (self.GT_hour_start + self.gt_hours):
                #if len(action) > 0:
                action_0 = 1
                if len(action) == 3:
                    action_1 = 1

                if self.PIR_or_thpl == '2':
                    PIR_gt = 1; THPL_gt = 1
                elif self.PIR_or_thpl == '1':
                    THPL_gt = 1
                elif self.PIR_or_thpl == '0':
                    PIR_gt = 1
                print("GT action taken")

        if self.PIR_or_thpl == '2':
            self.PIR_on_off = action_0
            self.thpl_on_off = action_1
        elif self.PIR_or_thpl == '1':
            self.PIR_on_off = 0
            self.thpl_on_off = action_0
        elif self.PIR_or_thpl == '0':
            self.PIR_on_off = action_0
            self.thpl_on_off = 0

        if len(action) == 2:
            self.next_wake_up_time  = int((s_t_max_new-s_t_min_new)/(s_t_max_act-s_t_min_act)*(action_1-s_t_max_act)+s_t_max_new)
        elif len(action) == 3:
            self.next_wake_up_time  = int((s_t_max_new-s_t_min_new)/(s_t_max_act-s_t_min_act)*(action_2-s_t_max_act)+s_t_max_new)
        #self.next_wake_up_time = 60

        self.time_next = self.time + datetime.timedelta(minutes=self.next_wake_up_time) #next_wake_up_time # in min

        if self.train == 'real': # now it changes because data are collected with time
            print("looking for data in between: ", self.time, self.time_next)
            #self.file_data = Ember_RL_func.select_input_data(self.path_light_data, self.start_data_date, self.end_data_date)
            self.file_data = Ember_RL_func.select_input_data(self.path_light_data, self.time, self.time_next)
            print("self.file_data: ")
            for data in self.file_data:
                print(data)

        #if len(self.file_data) > 0 and self.file_data[0] != self.last:
        #    self.file_data = [self.last] + self.file_data
        #    self.last = self.file_data[-1]

        #self.start_light_list, self.start_sc_volt_list, starter_data = Ember_RL_func.select_input_starter(self.path_light_data, start_data_date, num_light_input, num_sc_volt_input)
        #print(start_light_list, start_sc_volt_list)
        #self.data_pointer = 0
        #for line in self.file_data:
        #    print("file_data line: ", line)
        #splitt = self.last.split('|')
        #self.start_sc = (float(splitt[5]) * SC_volt_max)/100
        #print("last line found: ", self.last)

        #self.light, PIR_event_gt, PIR_events_found_dict, thpl_event_gt, self.data_pointer = light_event_func(self.time, self.time_next, self.mode, self.PIR_on_off, self.PIR_events_found_dict, self.light, self.light_div, self.file_data, self.data_pointer-1)
        self.light, PIR_event_gt, PIR_events_found_dict, thpl_event_gt, self.data_pointer = Pible_param_func.light_event_func_new(self.time, self.time_next, self.mode, self.PIR_on_off,
                                                                                                                                  self.PIR_events_found_dict, self.light, self.light_div,
                                                                                                                                  self.file_data, self.data_pointer)
        if self.PIR_or_thpl == '0':
            thpl_event_gt = 0
        elif self.PIR_or_thpl == '1':
            PIR_event_gt = 0

        PIR_event_det, PIR_event_miss, thpl_event_det, thpl_event_miss = Pible_param_func.event_det_miss(PIR_event_gt, thpl_event_gt, self.PIR_on_off, self.thpl_on_off, self.SC_Volt_array)

        if self.train == "test" and (PIR_event_miss > 0 or thpl_event_miss > 0) and (self.rem_miss == '1' or self.rem_miss == '2'):
            Ember_RL_func.remove_missed_data(self.time, self.time_next, self.path_light_data)

        SC_temp, en_prod, en_used = Pible_param_func.Energy(self.SC_Volt_array[0], self.light, self.PIR_or_thpl, self.PIR_on_off, self.thpl_on_off, self.next_wake_up_time, PIR_event_det, thpl_event_det)

        if self.train == 'real':
            if len(self.file_data) > 0:
                self.last = self.file_data[-1]
            splitt = self.last.split('|')
            SC_temp = (float(splitt[5]) * SC_volt_max)/100
            #print(SC_temp, self.last)
            print("new events PIR and THPL found: ", PIR_event_det, thpl_event_det)
            print("new events PIR and THPL miss: ", PIR_event_miss, thpl_event_miss)


        reward = self.get_reward_low_level(en_prod, en_used, PIR_event_det, PIR_event_miss, thpl_event_det, thpl_event_miss, self.PIR_on_off, self.thpl_on_off, self.SC_Volt_array)

        len_dict_event = np.array([len(self.PIR_events_found_dict)])
        if self.train == 'test' or self.train == 'real':
            self.Reward.append(reward); self.Time.append(self.time); self.Mode.append(self.mode); self.Light.append(self.light); self.PIR_ON_OFF.append(self.PIR_on_off); self.THPL_ON_OFF.append(self.thpl_on_off); self.SC_Volt.append(SC_temp);
            self.State_Trans.append(self.next_wake_up_time); self.PIR_event_det.append(PIR_event_det); self.PIR_event_miss.append(PIR_event_miss); self.Len_Dict_Events.append(len_dict_event)
            self.thpl_event_det.append(thpl_event_det); self.thpl_event_miss.append(thpl_event_miss); self.PIR_gt.append(PIR_gt); self.THPL_gt.append(THPL_gt)

        self.PIR_tot_events_detect += PIR_event_det
        self.PIR_tot_events += PIR_event_gt
        self.thpl_tot_events_detect += thpl_event_det
        self.thpl_tot_events += thpl_event_gt

        self.time = self.time_next

        self.week_end = Ember_RL_func.calc_week(self.time, num_week_input)

        self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array = Ember_RL_func.updates_arrays(self.hour_array, self.minute_array, self.light_array, self.SC_Volt_array, self.time, self.light, SC_temp)

        #print("after", self.hour_array, self.minute_array, self.SC_Volt_array)
        done = self.time >= self.end
        #done = True

        self.info["energy_used"] = en_used
        self.info["energy_prod"] = en_prod
        self.info["PIR_tot_events"] = self.PIR_tot_events
        self.info["PIR_events_detect"] = self.PIR_tot_events_detect
        self.info["thpl_tot_events"] = self.thpl_tot_events
        self.info["thpl_events_detect"] = self.thpl_tot_events_detect
        #info["death_days"] = self.death_days
        #info["death_min"] = self.death_min
        self.info["SC_volt"] = SC_temp
        self.info["state_transition"] = self.next_wake_up_time
        #self.info["GT_hours_start"] = 0 if (self.GT_hour_start + self.gt_hours) >= 24 else (self.GT_hour_start + self.gt_hours)
        self.info["GT_hours_start"] = 6 if (self.GT_hour_start + self.gt_hours) >= 21 else (self.GT_hour_start + self.gt_hours)
        self.info["hours_inj_prob"] = 0 if self.rem_miss != '2' else self.hours_inj_prob
        #print(self.PIR_tot_events, self.thpl_tot_events, self.PIR_tot_events_detect, self.thpl_tot_events_detect)
        #print(self.hour_array, self.light_array, self.SC_Volt_array, self.week_end, reward, done, info)
        #print(self.data_pointer)
        if self.train != "train" and done:
            self.save_data()

            #print(self.hour_array, self.light_array, self.SC_Volt_array, self.week_end, reward, done, info)
        #return (self.hour_array, self.minute_array, self.SC_Volt_array, self.week_end), reward, done, info
        #print("step ", self.hour_array, self.light_array, self.SC_Volt_array, self.week_end)
        #sleep(5)
        if self.diff_days >= 7:
            return (self.hour_array, self.light_array, self.SC_Volt_array, self.week_end), reward, done, self.info
        else:
            return (self.hour_array, self.light_array, self.SC_Volt_array), reward, done, self.info