Example #1
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2 * (self.action_range)
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters (CartPole)
        # self.gamma = 0.99  # discount factor
        # self.tau = 0.01  # for soft update of target parameters

        # Algorithm parameters (Quadcopter)
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
Example #2
0
 def gdl_changed(self, signal_name, device_udi, *args):
     """
     This method is called when a HAL device is added 
     or removed.
     """
     #play sound 
     playsound_removed=''
     playsound_added=''
     if signal_name=="DeviceAdded":
         #print "\nDeviceAdded, udi=%s"%(device_udi)
         self.add_device_signal_recv(device_udi)
         self.update_device_dict()
         required,actions,properties=self.properties_rules(device_udi)
         #get traicon position as a tuple (x,y)
         coordinates=self.get_tray_coordinates(self.trayicon.tray)
         actor=Actor(actions,required,properties,self.msg_render,self.config,self.voice,coordinates=coordinates )
         actor.on_added()
        
                 
     elif signal_name=="DeviceRemoved":
         #print "\nDeviceRemoved, udi=%s"%(device_udi) 
         required,actions,properties=self.properties_rules(device_udi)
         #get traicon position as a tuple (x,y)
         coordinates=self.get_tray_coordinates(self.trayicon.tray)
         actor=Actor(actions,required,properties,self.msg_render,self.config,self.voice,coordinates=coordinates)
         actor.on_removed() 
         self.remove_device_signal_recv(device_udi)
         self.virtual_root.pop(device_udi)
         self.trayicon.on_rem_udi(device_udi)
     elif signal_name=="NewCapability":
         [cap] = args 
         #print "\nNewCapability, cap=%s, udi=%s"%(cap, device_udi)
         
     # not an hal signal: needed to add new parttions
     elif signal_name=="VolumeAdded":
          print "\nVolumeAdded, udi=%s"%(device_udi)
          device_dict=args[0]
          print device_dict.__class__
          self.trayicon.on_add_udi(device_udi,device_dict)
          #for i in device_dict[0]["vm.info.childs"]:
          #    for key in i.keys():
          #        print i[key]
          
         
     
     else:
         print "*** Unknown signal %s"% signal_name
Example #3
0
 def property_modified(self, device_udi, num_changes, change_list):
     """
     This method is called when signals on the Device 
     interface is received
     """
     # device_udi: the device identifier
     # change_list :
     # ex: when the volume has been mounted
     # [('volume.mount_point', False, False), ('volume.is_mounted', False, False)]
     # it will read 
     # 1) ('volume.mount_point', False, False) 
     #    i[0] == volume.mount_point [property_name] [key]
     #    i[1] == False              [removed]       [rem=0|1]
     #    i[2] == False              [added]         [add=0|1]
     # 2) ('volume.is_mounted', False, False) 
     #    i[0] == volume.is_mounted  [property_name]
     #    i[1] == False              [removed]
     #    i[2] == False              [added] 
     print "\nPropertyModified, device=%s, num=%s"%(device_udi,num_changes)
     for i in change_list:
         property_name = i[0]
         removed = i[1]
         added = i[2]
         print property_name,removed,added
         #print "  key=%s, rem=%d, add=%d"%(property_name, removed, added)
         if property_name=="info.parent": 
             self.update_device_list()        
         else:
             device_udi_obj = self.bus.get_object("org.freedesktop.Hal", device_udi)
             properties  = self.udi_to_properties(device_udi)
         
         # if from the udi of the devce is possible to find the modified property:
         # value = the value of the hal key:
         # ex: 
         #    key=volume.mount_point
         #    value=/media/usbdisk
         
         if device_udi_obj.PropertyExists(property_name, dbus_interface="org.freedesktop.Hal.Device"):
             properties[property_name] = device_udi_obj.GetProperty(property_name, 
                                               dbus_interface="org.freedesktop.Hal.Device")
             print "  value=%s"%(properties[property_name])
             rules=RulesParser(filename=self.appdir+"/rules.xml", input=properties)
             #############################################################
             #                 ACTOR                                     #
             #                                                           #
             #if mount is true volume.mount_point property is modified   #
             #                                                           #
             #############################################################
             if "mount" in rules.actions.keys() and rules.actions["mount"]:
                 if property_name == "volume.mount_point":
                     actor=Actor(rules.actions,rules.required,properties,self.msg_render,self.config,self.voice )
                     # if val is empty don't do anything
                     actor.on_modified_mount(properties[property_name])
             else:
                 if property_name in rules.required.keys() and str(properties[property_name]) == str(rules.required[property_name]):
                     pass
                 else:
                     rules.actions={}
                     rules.required={}
         else:
             if device_obj != None:
                 try:
                     del device_obj.properties[property_name]
                 except:
                     pass
Example #4
0
class DDPG():
    """Reinforcement Learning agent that learns using DDPG."""
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2 * (self.action_range)
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters (CartPole)
        # self.gamma = 0.99  # discount factor
        # self.tau = 0.01  # for soft update of target parameters

        # Algorithm parameters (Quadcopter)
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

    def reset_episode(self):
        self.noise.reset()
        state = self.task.reset()
        self.last_state = state
        return state

    def step(self, action, reward, next_state, done):
        # Save experience / reward
        self.memory.add(self.last_state, action, reward, next_state, done)

        # Learn, if enough samples are available in memory
        if len(self.memory) > self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences)

        # Roll over last state and action
        self.last_state = next_state

    def act(self, state, enable_exploration):
        """Returns actions for given state(s) as per current policy."""
        state = np.reshape(state, [-1, self.state_size])
        action = self.actor_local.model.predict(state)[0]

        noise = np.zeros(self.action_size)
        if (enable_exploration):
            noise = self.noise.sample()

        return list(action + noise)

    def learn(self, experiences):
        """Update policy and value parameters using given batch of experience tuples."""
        # Convert experience tuples to separate arrays for each element (states, actions, rewards, etc.)
        states = np.vstack([e.state for e in experiences if e is not None])
        actions = np.array([e.action for e in experiences
                            if e is not None]).astype(np.float32).reshape(
                                -1, self.action_size)
        rewards = np.array([e.reward for e in experiences if e is not None
                            ]).astype(np.float32).reshape(-1, 1)
        dones = np.array([e.done for e in experiences
                          if e is not None]).astype(np.uint8).reshape(-1, 1)
        next_states = np.vstack(
            [e.next_state for e in experiences if e is not None])

        # Get predicted next-state actions and Q values from target models
        #     Q_targets_next = critic_target(next_state, actor_target(next_state))
        actions_next = self.actor_target.model.predict_on_batch(next_states)
        Q_targets_next = self.critic_target.model.predict_on_batch(
            [next_states, actions_next])

        # Compute Q targets for current states and train critic model (local)
        Q_targets = rewards + self.gamma * Q_targets_next * (1 - dones)
        self.critic_local.model.train_on_batch(x=[states, actions],
                                               y=Q_targets)

        # Train actor model (local)
        action_gradients = np.reshape(
            self.critic_local.get_action_gradients([states, actions, 0]),
            (-1, self.action_size))
        self.actor_local.train_fn([states, action_gradients,
                                   1])  # custom training function

        # Soft-update target models
        self.soft_update(self.critic_local.model, self.critic_target.model)
        self.soft_update(self.actor_local.model, self.actor_target.model)

    def soft_update(self, local_model, target_model):
        """Soft update model parameters."""
        local_weights = np.array(local_model.get_weights())
        target_weights = np.array(target_model.get_weights())

        assert len(local_weights) == len(
            target_weights
        ), "Local and target model parameters must have the same size"

        new_weights = self.tau * local_weights + (1 -
                                                  self.tau) * target_weights
        target_model.set_weights(new_weights)

    def load_model(self, actor_filename, critic_filename):
        self.actor_local.load_model(actor_filename)
        self.critic_local.load_model(critic_filename)

        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

    def save_model(self, actor_filename, critic_filename):
        self.actor_local.save_model(actor_filename)
        self.critic_local.save_model(critic_filename)