예제 #1
0
	def reset(self):
		self.step_made = 0
		self.state = list()
		for ctrl_id, attr in self.o_dict.items():
			self.state.append(self.netobj.get_output()[ctrl_id][attr])
		self.netobj = ne.Netz(self.env.get_net(), self.env.get_logger(), agent=self.id, time=0)
		return(self.state)
예제 #2
0
	def __init__(self, net, log, net_id='example_simple', net_fp='./loc_data/pp_net.json', grid_fp='./loc_data/gridIO.json'):
		
		self.log = log
		self.t_handle = InSimTime()
		self.net = net
		self.netobj = ne.Netz(self.net, self.log, time=self.t_handle.get_state()['time'])
		self.net_id = net_id
		self.net_fp = net_fp

		self.grid_fp = grid_fp
예제 #3
0
    def __init__(self,
                 env,
                 agent_id,
                 action_dict={'trafo0': 'tp_pos'},
                 observation_dict={
                     'line0': 'loading_percent',
                     'line1': 'loading_percent',
                     'line2': 'loading_percent',
                     'line3': 'loading_percent'
                 },
                 reward_id=('SimpleReward', {}),
                 max_step=100):

        self.env = env
        self.id = agent_id
        self.t_handle = InSimTime()

        n = self.env.get_net()
        time = self.t_handle.get_state()['time']
        self.log = self.env.get_logger()
        self.netobj = ne.Netz(n, self.log, agent=self.id, time=time)

        self.max_step = max_step
        self.step_made = 0

        self.state = list()
        self.last_state = list()

        self.dtype = dict()

        self.a_dict = action_dict
        self.o_dict = observation_dict

        self.list_input_keys = list()
        for keys in self.a_dict.keys():
            self.list_input_keys.append(keys)

        self.init_action_space(self.a_dict)
        self.init_observation_space(self.o_dict)
        self.last_reward = 0

        self.act = self.__get_default_pos()

        self.shpe = list()
        self.nb_actions = 0
        for name in self.list_input_keys:
            self.nb_actions += int(self.act_shape[name])
            self.shpe.append(self.act_shape[name])
        print('nb actions: {}'.format(self.nb_actions))
        self.reward_id, self.reward_keys = reward_id

        self.results = dict()
예제 #4
0
 def update_net(self, net):
     self.net = net
     self.netobj = ne.Netz(self.net,
                           self.log,
                           time=self.t_handle.get_state()['time'])
예제 #5
0
    def step(self, action):
        #reset net:
        current_time = self.t_handle.convert_step_to_time(self.step_made)
        net = self.env.get_net()
        self.netobj = ne.Netz(net, self.log, agent=self.id, time=current_time)

        state = list()
        state_result = dict()
        action_result = dict()

        for index in range(len(action)):
            act = action[index]
            ctrl = self.list_input_keys[index]
            attr = self.a_dict[ctrl]
            print((ctrl, attr))
            self.compute_action(ctrl,
                                act,
                                self.act_shape,
                                dtype=self.dtype[ctrl])

            self.act[ctrl] = (np.clip(np.array([self.act[ctrl]]),
                                      self.act_min[ctrl],
                                      self.act_max[ctrl]))[0]
            self.netobj.set_input(ctrl, attr, self.act[ctrl])
            action_result[ctrl + attr] = self.act[ctrl]
            print('Input-Parameter changed to: {}'.format(self.act[ctrl]))

        #Can be used to dynamicly change the production of Wind turbines.
        #Skript here
        self.netobj.run_powerflow()

        for ctrl_id, attr in self.o_dict.items():
            state.append(self.netobj.get_output()[ctrl_id][attr])
            print('{}: {} --- {}'.format(
                ctrl_id, attr,
                self.netobj.get_output()[ctrl_id][attr]))
            state_result[ctrl_id] = self.netobj.get_output()[ctrl_id][attr]

        reward_obj = rw.__dict__[self.reward_id](
            state, self.last_state, self.last_reward, self.reward_keys
        )  #state slice to control the amount of input faktors
        reward = reward_obj.compute_reward()

        self.step_made += 1
        done = False
        if self.step_made == self.max_step:  # or reward < 0:
            done = True
        print('Steps in Episode: ({}/{})'.format(self.step_made,
                                                 self.max_step))

        self.results[self.step_made] = {
            'state': state_result,
            'action': action_result
        }
        self.last_reward = reward
        self.last_state = state

        self.t_handle.incr_step()

        self.env.get_logger().info('executed step; see exp data')
        self.env.update_net(self.netobj.net)
        return np.array(state), reward, done, {}