Esempio n. 1
0
class LifeLong():
    def __init__(self):
        self.random_network = RND()
        self.predict_network = RND()
        self.moving_average = MovingAverage()
        
        self.random_network.trainable = False

    def reset(self):
        self.moving_average.reset()

    def get_modulator(self, observations):
        random = self.random_network(observations)
        predict = self.predict_network(observations)

        error = tf.math.square(predict - random)
        mean, stddev = self.moving_average(error)

        modulator = 1 + (error - mean) / stddev
        return modulator
    
    def train(self, batch_transitions: List[Transition]):
        observations = []
        next_observations = []

        for transition in batch_transitions:
            _observations = transition.observations[-6]
            
            for _observation, _next_observation in zip(_observations[:-1], _observations[1:]):
                observations.append(_observation)
                next_observations.append(_next_observation)
                
        observations = tf.convert_to_tensor(observations)
        next_observations = tf.convert_to_tensor(next_observations)
        with tf.GradientTape() as tape:
            with tape.stop_recording():
                true = self.random_network(observations)
            predict = self.predict_network(observations)
            loss = tf.keras.losses.mean_squared_error(true, predict)
        grads = tape.gradient(loss, self.predict_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.predict_network.trainable_variables))
Esempio n. 2
0
class Episodic():
    def __init__(
        self, num_of_actions, epsilon=0.0001, num_of_neighbours=10, cluster_distance=0.008,
        pseudo_counts=0.001, maximum_similarity=8, episodic_memory_capacity=30000):
        self.epsilon = epsilon
        self.num_of_neighbours = num_of_neighbours
        self.cluster_distance = cluster_distance
        self.pseudo_counts = pseudo_counts
        self.maximum_similarity = maximum_similarity

        self.episodic_memory = deque([], maxlen=episodic_memory_capacity)
        self.moving_average = MovingAverage()
        self.network = Embedding(num_of_actions)
        self.optimizer = tf.keras.optimizers.Adam()


    def reset(self):
        self.episodic_memory.clear()
        self.moving_average.reset()


    def get_similarity(self, observations: Observation, next_observations: Observation) -> Tuple[List[tf.float32], float]:
        likelihood, controllable_state = self.network(observations, next_observations)

        dists = [ tf.norm(controllable_state - state) for state in self.buffer ]
        dists = tf.nn.top_k(dists, self.num_of_neighbours)
        squared_dists = tf.math.square(dists)

        for squared_dist in squared_dists:
            self.moving_average(squared_dist)
        
        normalized_dists = squared_dists / self.moving_average.prev_mean
        normalized_dists = tf.math.maximum(normalized_dists - self.cluster_distance, 0.0)
        
        kernels = self.epsilon / (normalized_dists + self.epsilon)

        similarity = tf.math.sqrt(tf.math.reduce_sum(kernels)) + self.pseudo_counts

        self.buffer.append(controllable_state)
        
        if similarity > self.maximum_similarity:
            return likelihood, 0.0
        else:
            return likelihood, 1 / similarity
        
    def train(self, batch_transitions: List[Transition]):
        observations = []
        next_observations = []
        actions = []
        for transition in batch_transitions:
            _observations = transition.observations[-6]
            _actions = transition.actions[-5]
            
            for _observation, _next_observation, _action in zip(_observations[:-1], _observations[1:], _actions):
                observations.append(_observation)
                next_observations.append(_next_observation)
                actions.append(_action)
                
        observations = tf.convert_to_tensor(observations)
        next_observations = tf.convert_to_tensor(next_observations)
        actions = tf.convert_to_tensor(actions)
        with tf.GradientTape() as tape:
            likelihood, _ = self.network(observations, next_observations)
            loss = tf.keras.losses.mean_squared_error(actions, likelihood)
        grads = tape.gradient(loss, self.network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.network.trainable_variables))