def reset(self): self.step_made = 0 self.state = list() for ctrl_id, attr in self.o_dict.items(): self.state.append(self.netobj.get_output()[ctrl_id][attr]) self.netobj = ne.Netz(self.env.get_net(), self.env.get_logger(), agent=self.id, time=0) return(self.state)
def __init__(self, net, log, net_id='example_simple', net_fp='./loc_data/pp_net.json', grid_fp='./loc_data/gridIO.json'): self.log = log self.t_handle = InSimTime() self.net = net self.netobj = ne.Netz(self.net, self.log, time=self.t_handle.get_state()['time']) self.net_id = net_id self.net_fp = net_fp self.grid_fp = grid_fp
def __init__(self, env, agent_id, action_dict={'trafo0': 'tp_pos'}, observation_dict={ 'line0': 'loading_percent', 'line1': 'loading_percent', 'line2': 'loading_percent', 'line3': 'loading_percent' }, reward_id=('SimpleReward', {}), max_step=100): self.env = env self.id = agent_id self.t_handle = InSimTime() n = self.env.get_net() time = self.t_handle.get_state()['time'] self.log = self.env.get_logger() self.netobj = ne.Netz(n, self.log, agent=self.id, time=time) self.max_step = max_step self.step_made = 0 self.state = list() self.last_state = list() self.dtype = dict() self.a_dict = action_dict self.o_dict = observation_dict self.list_input_keys = list() for keys in self.a_dict.keys(): self.list_input_keys.append(keys) self.init_action_space(self.a_dict) self.init_observation_space(self.o_dict) self.last_reward = 0 self.act = self.__get_default_pos() self.shpe = list() self.nb_actions = 0 for name in self.list_input_keys: self.nb_actions += int(self.act_shape[name]) self.shpe.append(self.act_shape[name]) print('nb actions: {}'.format(self.nb_actions)) self.reward_id, self.reward_keys = reward_id self.results = dict()
def update_net(self, net): self.net = net self.netobj = ne.Netz(self.net, self.log, time=self.t_handle.get_state()['time'])
def step(self, action): #reset net: current_time = self.t_handle.convert_step_to_time(self.step_made) net = self.env.get_net() self.netobj = ne.Netz(net, self.log, agent=self.id, time=current_time) state = list() state_result = dict() action_result = dict() for index in range(len(action)): act = action[index] ctrl = self.list_input_keys[index] attr = self.a_dict[ctrl] print((ctrl, attr)) self.compute_action(ctrl, act, self.act_shape, dtype=self.dtype[ctrl]) self.act[ctrl] = (np.clip(np.array([self.act[ctrl]]), self.act_min[ctrl], self.act_max[ctrl]))[0] self.netobj.set_input(ctrl, attr, self.act[ctrl]) action_result[ctrl + attr] = self.act[ctrl] print('Input-Parameter changed to: {}'.format(self.act[ctrl])) #Can be used to dynamicly change the production of Wind turbines. #Skript here self.netobj.run_powerflow() for ctrl_id, attr in self.o_dict.items(): state.append(self.netobj.get_output()[ctrl_id][attr]) print('{}: {} --- {}'.format( ctrl_id, attr, self.netobj.get_output()[ctrl_id][attr])) state_result[ctrl_id] = self.netobj.get_output()[ctrl_id][attr] reward_obj = rw.__dict__[self.reward_id]( state, self.last_state, self.last_reward, self.reward_keys ) #state slice to control the amount of input faktors reward = reward_obj.compute_reward() self.step_made += 1 done = False if self.step_made == self.max_step: # or reward < 0: done = True print('Steps in Episode: ({}/{})'.format(self.step_made, self.max_step)) self.results[self.step_made] = { 'state': state_result, 'action': action_result } self.last_reward = reward self.last_state = state self.t_handle.incr_step() self.env.get_logger().info('executed step; see exp data') self.env.update_net(self.netobj.net) return np.array(state), reward, done, {}