def __init__( self, in_channels, convnet_out_size, out_dim, num_modules, num_agents, ): nn.Module.__init__(self) print('Per task exclusive: all fc') self.convolutions = nn.Sequential( SimpleConvNetBlock(in_channels, 32, 3), SimpleConvNetBlock(32, 32, 3), SimpleConvNetBlock(32, 32, 3), SimpleConvNetBlock(32, 32, 3), nn.BatchNorm2d(32), Flatten()) # self._loss_layer = Loss(torch.nn.MSELoss(), CorrectClassifiedReward(), discounting=1.) self._loss_layer = Loss(torch.nn.MSELoss(), NegLossReward(), discounting=1.) self.fc_layers = Sequential( PerTaskAssignment(), Selection(*[ LinearWithRelu(convnet_out_size, 48) for _ in range(num_modules) ]), Selection(*[LinearWithRelu(48, 48) for _ in range(num_modules)]), Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)]), )
def __init__(self, decision_maker, in_channels, convnet_out_size, out_dim, num_modules, num_agents): nn.Module.__init__(self) self.convolutions = nn.Sequential( SimpleConvNetBlock(in_channels, 32, 3), SimpleConvNetBlock(32, 32, 3), SimpleConvNetBlock(32, 32, 3), SimpleConvNetBlock(32, 32, 3), nn.BatchNorm2d(32), Flatten() ) self._loss_func = Loss(torch.nn.MSELoss(), CorrectClassifiedReward(), discounting=1.) self._initialization = Initialization() self._per_task_assignment = PerTaskAssignment() self._decision_1 = decision_maker( num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular', additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules)) self._decision_2 = decision_maker( num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular', additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules)) self._decision_3 = decision_maker( num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='tabular', additional_reward_func=CollaborationReward(reward_ratio=0.3, num_actions=num_modules)) self._selection_1 = Selection(*[LinearWithRelu(convnet_out_size, 48) for _ in range(num_modules)]) self._selection_2 = Selection(*[LinearWithRelu(48, 48) for _ in range(num_modules)]) self._selection_3 = Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)])
def __init__( self, in_channels, convnet_out_size, out_dim, num_modules, num_agents, ): PerTask_all_fc.__init__( self, in_channels, convnet_out_size, out_dim, num_modules, num_agents, ) print('Per task exclusive: last fc') self.convolutions = nn.Sequential(self.convolutions, Flatten(), LinearWithRelu(convnet_out_size, 48), LinearWithRelu(48, 48)) self._loss_layer = Loss(torch.nn.MSELoss(), CorrectClassifiedReward(), discounting=1.) self.fc_layers = Sequential( PerTaskAssignment(), Selection(*[nn.Linear(48, out_dim) for _ in range(num_modules)]), )
def _create_routing(self, routing_agent, num_agents, exploration, policy_storage_type, detach, approx_hidden_dims, additional_reward_func, dimensionality_defs): list_type = nn.ModuleList if not self.recurrent else FakeFlatModuleList effective_width = self.routing_width if not self.recurrent else self.routing_width + 1 # for termination action effective_depth = self.routing_depth if not self.recurrent else 1 base_selection = [] if not self.recurrent else [Identity()] router = list_type([ routing_agent( num_selections=effective_width, in_features=dimensionality_defs[i], num_agents=num_agents, exploration=exploration, policy_storage_type=policy_storage_type, detach=detach, approx_hidden_dims=approx_hidden_dims, additional_reward_func=additional_reward_func ) for i in range(effective_depth) ]) selection = list_type([ Selection(*(base_selection + [ # need base selection for termination action nn.Linear(dimensionality_defs[i], dimensionality_defs[i + 1]) for _ in range(effective_width) ])) for i in range(effective_depth) ]) return router, selection
def __init__(self, decision_maker, in_channels, convnet_out_size, out_dim, num_modules, num_agents): PerTask_all_fc.__init__(self, in_channels, convnet_out_size, out_dim, num_modules, num_agents) print('Routing Networks: all fc') self._initialization = Initialization() # self._per_task_assignment = PerTaskAssignment() self._per_task_assignment = decision_maker( num_agents, convnet_out_size, num_agents=1, policy_storage_type='approx', additional_reward_func=CollaborationReward( reward_ratio=0.3, num_actions=num_modules)) self._decision_1 = decision_maker( num_modules, convnet_out_size, num_agents=num_agents, policy_storage_type='approx', additional_reward_func=CollaborationReward( reward_ratio=0.3, num_actions=num_modules)) self._decision_2 = decision_maker( num_modules, 48, num_agents=num_agents, policy_storage_type='approx', additional_reward_func=CollaborationReward( reward_ratio=0.3, num_actions=num_modules)) self._decision_3 = decision_maker( num_modules, 48, num_agents=num_agents, policy_storage_type='approx', additional_reward_func=CollaborationReward( reward_ratio=0.3, num_actions=num_modules)) self._selection_1 = Selection( * [LinearWithRelu(convnet_out_size, 48) for _ in range(num_modules)]) self._selection_2 = Selection( *[LinearWithRelu(48, 48) for _ in range(num_modules)]) self._selection_3 = Selection( *[nn.Linear(48, out_dim) for _ in range(num_modules)])