Example #1
0
 def __init__(
     self,
     in_channels,
     convnet_out_size,
     out_dim,
     num_modules,
     num_agents,
 ):
     nn.Module.__init__(self)
     print('Per task exclusive: all fc')
     self.convolutions = nn.Sequential(
         SimpleConvNetBlock(in_channels, 32, 3),
         SimpleConvNetBlock(32, 32, 3), SimpleConvNetBlock(32, 32, 3),
         SimpleConvNetBlock(32, 32, 3), nn.BatchNorm2d(32), Flatten())
     # self._loss_layer = Loss(torch.nn.MSELoss(), CorrectClassifiedReward(), discounting=1.)
     self._loss_layer = Loss(torch.nn.MSELoss(),
                             NegLossReward(),
                             discounting=1.)
     self.fc_layers = Sequential(
         PerTaskAssignment(),
         Selection(*[
             LinearWithRelu(convnet_out_size, 48)
             for _ in range(num_modules)
         ]),
         Selection(*[LinearWithRelu(48, 48) for _ in range(num_modules)]),
         Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)]),
     )
Example #2
0
    def __init__(self, decision_maker, in_channels, convnet_out_size, out_dim, num_modules, num_agents):
        nn.Module.__init__(self)
        self.convolutions = nn.Sequential(
            SimpleConvNetBlock(in_channels, 32, 3),
            SimpleConvNetBlock(32, 32, 3),
            SimpleConvNetBlock(32, 32, 3),
            SimpleConvNetBlock(32, 32, 3),
            nn.BatchNorm2d(32),
            Flatten()
        )
        self._loss_func = Loss(torch.nn.MSELoss(), CorrectClassifiedReward(), discounting=1.)

        self._initialization = Initialization()
        self._per_task_assignment = PerTaskAssignment()

        self._decision_1 = decision_maker(
            num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular',
            additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules))
        self._decision_2 = decision_maker(
            num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular',
            additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules))
        self._decision_3 = decision_maker(
            num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular',
            additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules))

        self._selection_1 = Selection(*[LinearWithRelu(convnet_out_size, 48) for _ in range(num_modules)])
        self._selection_2 = Selection(*[LinearWithRelu(48, 48) for _ in range(num_modules)])
        self._selection_3 = Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)])
Example #3
0
 def __init__(
     self,
     in_channels,
     convnet_out_size,
     out_dim,
     num_modules,
     num_agents,
 ):
     PerTask_all_fc.__init__(
         self,
         in_channels,
         convnet_out_size,
         out_dim,
         num_modules,
         num_agents,
     )
     print('Per task exclusive: last fc')
     self.convolutions = nn.Sequential(self.convolutions, Flatten(),
                                       LinearWithRelu(convnet_out_size, 48),
                                       LinearWithRelu(48, 48))
     self._loss_layer = Loss(torch.nn.MSELoss(),
                             CorrectClassifiedReward(),
                             discounting=1.)
     self.fc_layers = Sequential(
         PerTaskAssignment(),
         Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)]),
     )
Example #4
0
 def _create_routing(self, routing_agent, num_agents, exploration, policy_storage_type, detach, approx_hidden_dims,
                 additional_reward_func, dimensionality_defs):
     list_type = nn.ModuleList if not self.recurrent else FakeFlatModuleList
     effective_width = self.routing_width if not self.recurrent else self.routing_width + 1  # for termination action
     effective_depth = self.routing_depth if not self.recurrent else 1
     base_selection = [] if not self.recurrent else [Identity()]
     router = list_type([
         routing_agent(
             num_selections=effective_width,
             in_features=dimensionality_defs[i],
             num_agents=num_agents,
             exploration=exploration,
             policy_storage_type=policy_storage_type,
             detach=detach,
             approx_hidden_dims=approx_hidden_dims,
             additional_reward_func=additional_reward_func
         ) for i in range(effective_depth)
     ])
     selection = list_type([
         Selection(*(base_selection + [  # need base selection for termination action
             nn.Linear(dimensionality_defs[i], dimensionality_defs[i + 1])
             for _ in range(effective_width)
         ]))
         for i in range(effective_depth)
     ])
     return router, selection
Example #5
0
    def __init__(self, decision_maker, in_channels, convnet_out_size, out_dim,
                 num_modules, num_agents):
        PerTask_all_fc.__init__(self, in_channels, convnet_out_size, out_dim,
                                num_modules, num_agents)
        print('Routing Networks:   all fc')
        self._initialization = Initialization()
        # self._per_task_assignment = PerTaskAssignment()
        self._per_task_assignment = decision_maker(
            num_agents,
            convnet_out_size,
            num_agents=1,
            policy_storage_type='approx',
            additional_reward_func=CollaborationReward(
                reward_ratio=0.3, num_actions=num_modules))

        self._decision_1 = decision_maker(
            num_modules,
            convnet_out_size,
            num_agents=num_agents,
            policy_storage_type='approx',
            additional_reward_func=CollaborationReward(
                reward_ratio=0.3, num_actions=num_modules))
        self._decision_2 = decision_maker(
            num_modules,
            48,
            num_agents=num_agents,
            policy_storage_type='approx',
            additional_reward_func=CollaborationReward(
                reward_ratio=0.3, num_actions=num_modules))
        self._decision_3 = decision_maker(
            num_modules,
            48,
            num_agents=num_agents,
            policy_storage_type='approx',
            additional_reward_func=CollaborationReward(
                reward_ratio=0.3, num_actions=num_modules))

        self._selection_1 = Selection(
            *
            [LinearWithRelu(convnet_out_size, 48) for _ in range(num_modules)])
        self._selection_2 = Selection(
            *[LinearWithRelu(48, 48) for _ in range(num_modules)])
        self._selection_3 = Selection(
            *[nn.Linear(48, out_dim) for _ in range(num_modules)])