Example #1
0
 def __init__(self, config, environment, policy):
     """
     :type config: app.config.model.ModelConfig
     :type environment: app.environments.environment.Environment
     :type policy: app.policies.policy.Policy
     """
     q_output_size = 1
     q_input_size = environment.observation_size + environment.action_size
     hidden_size = config.network.hidden_size
     number_of_hidden_layers = config.network.number_of_hidden_layers
     self.target_entropy = -environment.action_size
     self.log_alpha = torch.zeros(1, requires_grad=True)
     self.policy = policy
     self.reward_scale = config.reward_scale
     self.discount_factor = config.discount_factor
     self.exponential_weight = config.exponential_weight
     self.q1 = Network(q_input_size, hidden_size, q_output_size,
                       number_of_hidden_layers, nn.ReLU())
     self.q2 = Network(q_input_size, hidden_size, q_output_size,
                       number_of_hidden_layers, nn.ReLU())
     self.target_q1 = Network(q_input_size, hidden_size, q_output_size,
                              number_of_hidden_layers, nn.ReLU())
     self.target_q2 = Network(q_input_size, hidden_size, q_output_size,
                              number_of_hidden_layers, nn.ReLU())
     self.q_criterion = nn.MSELoss()
     self.alpha_optimizer = optim.Adam([self.log_alpha],
                                       lr=config.learning_rate_policy)
     self.policy_optimizer = optim.Adam(policy.parameters(),
                                        lr=config.learning_rate_policy)
     self.q1_optimizer = optim.Adam(self.q1.parameters(),
                                    lr=config.learning_rate_q)
     self.q2_optimizer = optim.Adam(self.q2.parameters(),
                                    lr=config.learning_rate_q)
Example #2
0
 def load_network(self, key):
     lc.log.info("network: {}".format(key))
     if (key[:3] == 'gen'):
         print(key[4])
         self.network = Network.generate(int(key[4]))
     else:
         self.network = self.networks[key]
Example #3
0
 def load_network(self, key):
     lc.log.info("network: {}".format(key))
     if (key[:3] == 'gen'):
         print(key[4])
         self.network = Network.generate(int(key[4]))
     else:
         self.network = self.networks[key]
Example #4
0
def b2():
    a, b = Node(7 + 3, 1 + 3), Node(7 + 3, 4 + 3)
    c, d = Node(4 + 3, 4 + 3), Node(10 + 3, 4 + 3)
    e = Node(10 + 3, 7 + 3)
    return Network([a, b, c, d, e],
                   [Link(a, b), Link(b, c),
                    Link(b, d), Link(d, e)])
Example #5
0
 def __init__(self, config, environment):
     """
     :type config: app.config.policies.CategoricalPolicyConfig
     :type environment: app.config.environments.EnvironmentConfig
     """
     super(Policy, self).__init__()
     input_size = environment.observation_size
     output_size = len(config.actions)
     hidden_size = config.network.hidden_size
     number_of_hidden_layers = config.network.number_of_hidden_layers
     self.actions = torch.tensor(config.actions)
     self.network = Network(input_size, hidden_size, output_size,
                            number_of_hidden_layers)
 def __init__(self, config, environment, deterministic=False):
     """
     :type config: app.config.policies.GaussianPolicyConfig
     :type environment: app.config.environments.EnvironmentConfig
     """
     super(Policy, self).__init__()
     input_size = environment.observation_size
     output_size = 2  # mean and log(std)
     hidden_size = config.network.hidden_size
     number_of_hidden_layers = config.network.number_of_hidden_layers
     self.deterministic = deterministic
     self.network = Network(input_size, hidden_size, output_size,
                            number_of_hidden_layers)
Example #7
0
def triangle_network():
    node_a = Node(3, 3)
    node_b = Node(12, 6)
    node_c = Node(6, 12)
    node_e = Node(6, 6)
    node_f = Node(14, 14)

    link_1 = Link(node_a, node_b, 'AB')
    link_2 = Link(node_b, node_c, 'BC')
    link_3 = Link(node_c, node_a, 'AC')

    return Network([node_a, node_b, node_c, node_e, node_f],
                   [link_1, link_2, link_3])
Example #8
0
def all_networks():
    return {
        # "one node": one_node_network(),
        # "two nodes": two_nodes_network(),
        # "triangle": triangle_network(),
        # "horizontal line": horizontal_network(),
        # "vertical line": vertical_network(),
        # "whole network": whole_network(),
        "default": Network(),
        "l": l_network(),
        "gen 0 connection": None,
        "gen 1 connection": None,
        "gen 2 connections": None,
        "gen 3 connections": None
    }
Example #9
0
def whole_network():
    a, b, c, d = Node(3, 4), Node(10.5, 4), Node(6, 8), Node(10.5, 8)
    e, f, g, h = Node(16.5, 8), Node(18, 6), Node(3, 16), Node(13.5, 12)
    i, j = Node(16.5, 16), Node(3, 12)
    return Network([a, b, c, d, e, f, g, h, i, j], [
        Link(a, b),
        Link(a, c),
        Link(a, j),
        Link(b, d),
        Link(c, d),
        Link(d, h),
        Link(d, e),
        Link(e, f),
        Link(h, i),
        Link(j, g),
        Link(j, h),
        Link(h, e)
    ])
Example #10
0
def exp1_network():
    home = Node(5, 8)
    cinema = Node(8, 7)
    police = Node(7, 3)
    market = Node(14, 5)
    city_hall = Node(15, 10)
    school = Node(7, 12)
    tower = Node(3, 15)

    links = [
        Link(home, cinema),
        Link(home, police),
        Link(cinema, police),
        Link(police, market),
        Link(market, city_hall),
        Link(school, city_hall),
        Link(home, school)
    ]

    return Network([home, cinema, police, market, city_hall, school, tower],
                   links)
Example #11
0
def conc1_network():
    sophie = Node(6, 2)
    mathiew = Node(2, 6)
    jean = Node(10, 6)
    martin = Node(2, 10)
    harold = Node(10, 10)
    helene = Node(14, 10)
    julie = Node(10, 14)
    marie = Node(14, 14)

    links = [
        Link(sophie, mathiew),
        Link(sophie, jean),
        Link(jean, mathiew),
        Link(martin, mathiew),
        Link(jean, harold),
        Link(jean, helene),
        Link(mathiew, harold),
        Link(harold, julie),
        Link(helene, marie),
        Link(julie, marie)
    ]
    return Network(
        [sophie, mathiew, jean, martin, harold, helene, julie, marie], links)
Example #12
0
def w():
    a, b = Node(10, 10), Node(8.5, 10)
    return Network([a, b], [Link(a, b)])
Example #13
0
def s():
    a, b = Node(10, 10), Node(10, 11.5)
    return Network([a, b], [Link(a, b)])
Example #14
0
def l_network():
    a = Node(10, 10, 'Paris')
    b, c = Node(10, 7, 'Lille'), Node(13, 10, 'Strasbourg')
    return Network([a, b, c], [Link(a, b), Link(a, c)])
Example #15
0
def f2():
    c, d = Node(10, 3), Node(10, 6)
    e, f, g = Node(8 + 5, 6), Node(3 + 5, 8), Node(6 + 5, 8)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(d, f), Link(f, g)])
Example #16
0
def d2():
    c, d = Node(4 + 3, 4), Node(6 + 3, 6)
    e, f, g = Node(6 + 3, 9), Node(8 + 3, 7), Node(9 + 3, 9)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(e, f), Link(e, g)])
Example #17
0
def e2():
    c, d = Node(4 + 4, 4), Node(6 + 4, 6)
    e, f, g = Node(6 + 4, 9), Node(3 + 4, 9), Node(9 + 4, 9)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(e, f), Link(e, g)])
Example #18
0
class Model:
    """
    Soft Actor-Critic model.
    """
    def __init__(self, config, environment, policy):
        """
        :type config: app.config.model.ModelConfig
        :type environment: app.environments.environment.Environment
        :type policy: app.policies.policy.Policy
        """
        q_output_size = 1
        q_input_size = environment.observation_size + environment.action_size
        hidden_size = config.network.hidden_size
        number_of_hidden_layers = config.network.number_of_hidden_layers
        self.target_entropy = -environment.action_size
        self.log_alpha = torch.zeros(1, requires_grad=True)
        self.policy = policy
        self.reward_scale = config.reward_scale
        self.discount_factor = config.discount_factor
        self.exponential_weight = config.exponential_weight
        self.q1 = Network(q_input_size, hidden_size, q_output_size,
                          number_of_hidden_layers, nn.ReLU())
        self.q2 = Network(q_input_size, hidden_size, q_output_size,
                          number_of_hidden_layers, nn.ReLU())
        self.target_q1 = Network(q_input_size, hidden_size, q_output_size,
                                 number_of_hidden_layers, nn.ReLU())
        self.target_q2 = Network(q_input_size, hidden_size, q_output_size,
                                 number_of_hidden_layers, nn.ReLU())
        self.q_criterion = nn.MSELoss()
        self.alpha_optimizer = optim.Adam([self.log_alpha],
                                          lr=config.learning_rate_policy)
        self.policy_optimizer = optim.Adam(policy.parameters(),
                                           lr=config.learning_rate_policy)
        self.q1_optimizer = optim.Adam(self.q1.parameters(),
                                       lr=config.learning_rate_q)
        self.q2_optimizer = optim.Adam(self.q2.parameters(),
                                       lr=config.learning_rate_q)

    def train_batch(self, observations, next_observations, actions, rewards,
                    terminals):
        """
        Forward pass.
        Assumes inputs are torch tensors.
        """
        alpha = self.log_alpha.exp()
        alpha_detached = alpha.detach()

        policy_actions, policy_log_probability = self.policy(observations)
        policy_next_actions, policy_next_log_probability = self.policy(
            next_observations)

        # concatenate observations and corresponding actions
        observation_actions = torch.cat((observations, actions), dim=1)
        observation_policy_actions = torch.cat((observations, policy_actions),
                                               dim=1)
        next_observation_policy_next_actions = torch.cat(
            (next_observations, policy_next_actions), dim=1)

        # q-values
        q1_policy_actions = self.q1(observation_policy_actions)
        q2_policy_actions = self.q2(observation_policy_actions)
        q_policy_actions = torch.min(q1_policy_actions, q2_policy_actions)
        q1_actions = self.q1(observation_actions)
        q2_actions = self.q2(observation_actions)

        # target q-values
        target_q1_policy_next_actions = self.target_q1(
            next_observation_policy_next_actions)
        target_q2_policy_next_actions = self.target_q2(
            next_observation_policy_next_actions)
        target_q_policy_next_actions = torch.min(
            target_q1_policy_next_actions, target_q2_policy_next_actions)
        value_next_observation = target_q_policy_next_actions - alpha_detached * policy_next_log_probability
        q_target = self.reward_scale * rewards + (
            1.0 - terminals) * self.discount_factor * value_next_observation

        # losses
        q1_loss = self.q_criterion(q1_actions, q_target.detach())
        q2_loss = self.q_criterion(q2_actions, q_target.detach())
        policy_loss = (alpha_detached * policy_log_probability -
                       q_policy_actions).mean()
        alpha_loss = -(
            alpha *
            (policy_log_probability + self.target_entropy).detach()).mean()

        # optimize
        self.optimize(self.q1_optimizer, q1_loss)
        self.optimize(self.q2_optimizer, q2_loss)
        self.optimize(self.policy_optimizer, policy_loss)
        self.optimize(self.alpha_optimizer, alpha_loss)
        self.update_exponential_moving_target(self.q1, self.target_q1)
        self.update_exponential_moving_target(self.q2, self.target_q2)

        return policy_loss.detach().numpy(), q1_loss.detach().numpy(
        ), q2_loss.detach().numpy(), alpha_loss.detach().numpy()

    def get_action(self, observation):
        """
        Computes next action.
        Assumes input is a numpy array.
        """
        return self.policy.get_action(observation)

    def optimize(self, optimizer, loss):
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    def copy_parameters(self, source, target):
        for source_param, target_param in zip(source.parameters(),
                                              target.parameters()):
            target_param.data.copy_(source_param.data)

    def update_exponential_moving_target(self, q, target):
        for q_param, target_param in zip(q.parameters(), target.parameters()):
            q_contribution = self.exponential_weight * q_param.data
            target_contribution = (1.0 -
                                   self.exponential_weight) * target_param.data
            target_param_new = q_contribution + target_contribution
            target_param.data.copy_(target_param_new)

    def eval_mode(self):
        self.policy.eval()
        self.q1.eval()
        self.q2.eval()
        self.target_q1.eval()
        self.target_q2.eval()

    def train_mode(self):
        self.policy.train()
        self.q1.train()
        self.q2.train()
        self.target_q1.train()
        self.target_q2.train()

    def load(self, path):
        self.policy.load_state_dict(torch.load(os.path.join(path,
                                                            'policy.pt')))
        self.q1.load_state_dict(torch.load(os.path.join(path, 'q1.pt')))
        self.q2.load_state_dict(torch.load(os.path.join(path, 'q2.pt')))
        self.copy_parameters(self.q1, self.target_q1)
        self.copy_parameters(self.q2, self.target_q2)

    def save(self, path):
        torch.save(self.policy.state_dict(), os.path.join(path, 'policy.pt'))
        torch.save(self.q1.state_dict(), os.path.join(path, 'q1.pt'))
        torch.save(self.q2.state_dict(), os.path.join(path, 'q2.pt'))
Example #19
0
def one_node_network():
    return Network([Node(5, 5)], [])
Example #20
0
def nw():
    a, b = Node(10, 10), Node(9, 9)
    return Network([a, b], [Link(a, b)])
Example #21
0
def horizontal_network():
    a = Node(6, 8)
    b = Node(10, 8)
    return Network([a, b], [Link(a, b)])
Example #22
0
def a2():
    c, d = Node(10, 4), Node(8, 6)
    e, f, g = Node(8, 9), Node(10, 7), Node(11, 9)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(e, g), Link(e, f)])
Example #23
0
def vertical_network():
    a = Node(10, 6)
    b = Node(10, 10)
    return Network([a, b], [Link(a, b)])
Example #24
0
def c2():
    c, d = Node(10, 4), Node(8, 6)
    e, f, g = Node(10, 8), Node(8, 9), Node(5, 9)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(d, f), Link(f, g)])
Example #25
0
def se():
    a, b = Node(10, 10), Node(11, 11)
    return Network([a, b], [Link(a, b)])
Example #26
0
def two_nodes_network():
    return Network([Node(5, 5), Node(10, 10)], [])
Example #27
0
def f1():
    c, d = Node(7, 6), Node(10, 6)
    e, f, g = Node(13, 6), Node(8, 8), Node(11, 8)
    return Network([c, d, e, f, g],
                   [Link(c, d), Link(d, e),
                    Link(d, f), Link(f, g)])