class CDPNet(ESNetwork): def __init__(self, input_size, output_size, noise_std=0.01, prune_noise_std=0.0015, action_noise_std=None, num_eps_samples=64, sample_type="antithetic"): """ Eligibility Network with reward modulated plastic weights :param input_size: (int) size of observation space :param output_size: (int) size of action space :param num_eps_samples: (int) number of epsilon samples (population size) :param sample_type: (str) network noise sampling type """ self.params = list() # list of parameters to update self.prune_params = list() # list of parameters to update self.input_size = input_size # observation space dimensionality self.output_size = output_size # action space dimensionality self.action_noise_std = action_noise_std # action noise standard deviation recur_ff1_meta = { "clip": 2, "activation": identity, "input_size": input_size, "output_size": 16 } self.recur_plastic_ff1 = \ NetworkModule("linear", recur_ff1_meta) self.params.append(self.recur_plastic_ff1) recur_ff2_meta = { "clip": 2, "activation": identity, "input_size": 16, "output_size": 16 } self.recur_plastic_ff2 = \ NetworkModule("structural_neuromod_recurrent_eligibility", recur_ff2_meta, save_activations=True) self.params.append(self.recur_plastic_ff2) self.prune_params.append(self.recur_plastic_ff2.prune_parameters) recur_ff3_meta = { "clip": 2, "activation": identity, "input_size": 16, "output_size": output_size } self.recur_plastic_ff3 = \ NetworkModule("linear", recur_ff3_meta) self.params.append(self.recur_plastic_ff3) super(CDPNet, self).__init__(noise_std=noise_std, prune_params=self.prune_params, params=self.params, num_eps_samples=num_eps_samples, sample_type=sample_type, prune_noise_std=prune_noise_std) def reset(self): """ Reset inter-lifetime network parameters :return: None """ for _param in self.params: _param.reset() def forward(self, x): """ Forward propagate input value :param x: (ndarray) state input :return: (ndarray) post synaptic activity at final layer """ pre_synaptic_ff1 = x post_synaptic_ff1 = np.tanh( self.recur_plastic_ff1.forward(pre_synaptic_ff1)) pre_synaptic_ff2 = post_synaptic_ff1 post_synaptic_ff2 = \ self.recur_plastic_ff2.forward(pre_synaptic_ff2) pre_synaptic_ff3 = post_synaptic_ff2 post_synaptic_ff3 = \ self.recur_plastic_ff3.forward(pre_synaptic_ff3) if self.action_noise_std is not None: x += np.random.randn(*x.shape) * self.action_noise_std return post_synaptic_ff3
class CDPNet(ESNetwork): def __init__(self, input_size, output_size, noise_std=0.01, action_noise_std=None, num_eps_samples=64, sample_type="antithetic"): """ Eligibility Network with reward modulated plastic weights :param input_size: (int) size of observation space :param output_size: (int) size of action space :param num_eps_samples: (int) number of epsilon samples (population size) :param sample_type: (str) network noise sampling type """ self.params = list() # list of parameters to update self.input_size = input_size # observation space dimensionality self.output_size = output_size # action space dimensionality self.action_noise_std = action_noise_std # action noise standard deviation self.ff_connectivity_type = "linear" #"eligibility" # connectivity type -- eligibility recur_ff1_meta = { "clip":1, "activation": identity, "input_size": input_size, "output_size": 32} self.recur_plastic_ff1 = \ NetworkModule("linear", recur_ff1_meta) self.params.append(self.recur_plastic_ff1) #recur_ff2_meta = { # "clip":1, "activation": identity, "input_size": 64, "output_size": 32} #self.recur_plastic_ff2 = \ # NetworkModule(self.ff_connectivity_type, recur_ff2_meta) #self.params.append(self.recur_plastic_ff2) recur_ff3_meta = { "clip":1, "activation": identity, "input_size": 32, "output_size": output_size} self.recur_plastic_ff3 = \ NetworkModule("simple_neuromod_recurrent", recur_ff3_meta) self.params.append(self.recur_plastic_ff3) #gate_ff1_meta = { # "clip":1, "activation": identity, "input_size": input_size, "output_size": 32} #self.gate_ff1 = \ # NetworkModule(self.ff_connectivity_type, gate_ff1_meta) #self.params.append(self.gate_ff1) #gate_ff2_meta = { # "clip":1, "activation": identity, "input_size": 32, "output_size": 32} #self.gate_ff2 = \ # NetworkModule(self.ff_connectivity_type, gate_ff2_meta) #self.params.append(self.gate_ff2) super(CDPNet, self).__init__(noise_std=noise_std, params=self.params, num_eps_samples=num_eps_samples, sample_type=sample_type) def reset(self): """ Reset inter-lifetime network parameters :return: None """ for _param in self.params: _param.reset() def forward(self, x): """ Forward propagate input value :param x: (ndarray) state input :return: (ndarray) post synaptic activity at final layer """ pre_synaptic_gate1 = x #gated_activity1 = np.where(1/(1 + np.exp( # -self.gate_ff1.forward(pre_synaptic_gate1))) >= 0.5, 1.0, 0.0) #gated_activity2 = np.where(1/(1 + np.exp( # -self.gate_ff2.forward(gated_activity1))) >= 0.5, 1.0, 0.0) pre_synaptic_ff1 = x post_synaptic_ff1 = np.tanh( self.recur_plastic_ff1.forward(pre_synaptic_ff1)) #* gated_activity1 #pre_synaptic_ff2 = post_synaptic_ff1 #post_synaptic_ff2 = np.tanh( # self.recur_plastic_ff2.forward(pre_synaptic_ff2)) #* gated_activity2 pre_synaptic_ff3 = post_synaptic_ff1 post_synaptic_ff3 = \ self.recur_plastic_ff3.forward(pre_synaptic_ff3) if self.action_noise_std is not None: x += np.random.randn(*x.shape)*self.action_noise_std return post_synaptic_ff3
class CDPNet(ESNetwork): def __init__(self, input_size, output_size, noise_std=0.01, action_noise_std=None, num_eps_samples=64, sample_type="antithetic"): """ Eligibility Network with reward modulated plastic weights :param input_size: (int) size of observation space :param output_size: (int) size of action space :param num_eps_samples: (int) number of epsilon samples (population size) :param sample_type: (str) network noise sampling type """ self.params = list() # list of parameters to update self.input_size = input_size # observation space dimensionality self.output_size = output_size # action space dimensionality self.action_noise_std = action_noise_std # action noise standard deviation self.ff_connectivity_type = "linear" # connectivity type -- eligibility recur_ff1_meta = { "clip": 1, "activation": identity, "input_size": input_size, "output_size": 512 } self.recur_plastic_ff1 = \ NetworkModule(self.ff_connectivity_type, recur_ff1_meta) self.params.append(self.recur_plastic_ff1) recur_ff2_meta = { "clip": 1, "activation": identity, "input_size": 512, "output_size": 512 } self.recur_plastic_ff2 = \ NetworkModule("simple_neuromod", recur_ff2_meta) self.params.append(self.recur_plastic_ff2) recur_ff3_meta = { "clip": 1, "activation": identity, "input_size": 512, "output_size": output_size } self.recur_plastic_ff3 = \ NetworkModule("simple_neuromod", recur_ff3_meta) self.params.append(self.recur_plastic_ff3) super(CDPNet, self).__init__(noise_std=noise_std, params=self.params, num_eps_samples=num_eps_samples, sample_type=sample_type) def reset(self): """ Reset inter-lifetime network parameters :return: None """ for _param in self.params: _param.reset() def forward(self, x, act=False): """ Forward propagate input value :param x: (ndarray) state input :return: (ndarray) post synaptic activity at final layer """ pre_synaptic_ff1 = x post_synaptic_ff1 = np.tanh( self.recur_plastic_ff1.forward(pre_synaptic_ff1)) pre_synaptic_ff2 = post_synaptic_ff1 post_synaptic_ff2 = np.tanh( self.recur_plastic_ff2.forward(pre_synaptic_ff2)) pre_synaptic_ff3 = post_synaptic_ff2 post_synaptic_ff3 = self.recur_plastic_ff3.forward(pre_synaptic_ff3) action = post_synaptic_ff3 return action