Example #1
0
class CDPNet(ESNetwork):
    def __init__(self,
                 input_size,
                 output_size,
                 noise_std=0.01,
                 prune_noise_std=0.0015,
                 action_noise_std=None,
                 num_eps_samples=64,
                 sample_type="antithetic"):
        """
        Eligibility Network with reward modulated plastic weights
        :param input_size: (int) size of observation space
        :param output_size: (int) size of action space
        :param num_eps_samples: (int) number of epsilon samples (population size)
        :param sample_type: (str) network noise sampling type
        """
        self.params = list()  # list of parameters to update
        self.prune_params = list()  # list of parameters to update
        self.input_size = input_size  # observation space dimensionality
        self.output_size = output_size  # action space dimensionality
        self.action_noise_std = action_noise_std  # action noise standard deviation

        recur_ff1_meta = {
            "clip": 2,
            "activation": identity,
            "input_size": input_size,
            "output_size": 16
        }
        self.recur_plastic_ff1 = \
            NetworkModule("linear", recur_ff1_meta)
        self.params.append(self.recur_plastic_ff1)

        recur_ff2_meta = {
            "clip": 2,
            "activation": identity,
            "input_size": 16,
            "output_size": 16
        }
        self.recur_plastic_ff2 = \
            NetworkModule("structural_neuromod_recurrent_eligibility", recur_ff2_meta, save_activations=True)
        self.params.append(self.recur_plastic_ff2)
        self.prune_params.append(self.recur_plastic_ff2.prune_parameters)

        recur_ff3_meta = {
            "clip": 2,
            "activation": identity,
            "input_size": 16,
            "output_size": output_size
        }
        self.recur_plastic_ff3 = \
            NetworkModule("linear", recur_ff3_meta)
        self.params.append(self.recur_plastic_ff3)

        super(CDPNet, self).__init__(noise_std=noise_std,
                                     prune_params=self.prune_params,
                                     params=self.params,
                                     num_eps_samples=num_eps_samples,
                                     sample_type=sample_type,
                                     prune_noise_std=prune_noise_std)

    def reset(self):
        """
        Reset inter-lifetime network parameters
        :return: None
        """
        for _param in self.params:
            _param.reset()

    def forward(self, x):
        """
        Forward propagate input value
        :param x: (ndarray) state input
        :return: (ndarray) post synaptic activity at final layer
        """

        pre_synaptic_ff1 = x
        post_synaptic_ff1 = np.tanh(
            self.recur_plastic_ff1.forward(pre_synaptic_ff1))

        pre_synaptic_ff2 = post_synaptic_ff1
        post_synaptic_ff2 = \
            self.recur_plastic_ff2.forward(pre_synaptic_ff2)

        pre_synaptic_ff3 = post_synaptic_ff2
        post_synaptic_ff3 = \
            self.recur_plastic_ff3.forward(pre_synaptic_ff3)

        if self.action_noise_std is not None:
            x += np.random.randn(*x.shape) * self.action_noise_std

        return post_synaptic_ff3
Example #2
0
class CDPNet(ESNetwork):
    def __init__(self, input_size, output_size, noise_std=0.01,
            action_noise_std=None, num_eps_samples=64, sample_type="antithetic"):
        """
        Eligibility Network with reward modulated plastic weights
        :param input_size: (int) size of observation space
        :param output_size: (int) size of action space
        :param num_eps_samples: (int) number of epsilon samples (population size)
        :param sample_type: (str) network noise sampling type
        """
        self.params = list()  # list of parameters to update
        self.input_size = input_size  # observation space dimensionality
        self.output_size = output_size  # action space dimensionality
        self.action_noise_std = action_noise_std  # action noise standard deviation
        self.ff_connectivity_type = "linear" #"eligibility"  # connectivity type -- eligibility

        recur_ff1_meta = {
            "clip":1, "activation": identity, "input_size": input_size, "output_size": 32}
        self.recur_plastic_ff1 = \
            NetworkModule("linear", recur_ff1_meta)
        self.params.append(self.recur_plastic_ff1)
        #recur_ff2_meta = {
        #    "clip":1, "activation": identity, "input_size": 64, "output_size": 32}
        #self.recur_plastic_ff2 = \
        #    NetworkModule(self.ff_connectivity_type, recur_ff2_meta)
        #self.params.append(self.recur_plastic_ff2)
        recur_ff3_meta = {
            "clip":1, "activation": identity, "input_size": 32, "output_size": output_size}
        self.recur_plastic_ff3 = \
            NetworkModule("simple_neuromod_recurrent", recur_ff3_meta)
        self.params.append(self.recur_plastic_ff3)

        #gate_ff1_meta = {
        #    "clip":1, "activation": identity, "input_size": input_size, "output_size": 32}
        #self.gate_ff1 = \
        #    NetworkModule(self.ff_connectivity_type, gate_ff1_meta)
        #self.params.append(self.gate_ff1)
        #gate_ff2_meta = {
        #    "clip":1, "activation": identity, "input_size": 32, "output_size": 32}
        #self.gate_ff2 = \
        #    NetworkModule(self.ff_connectivity_type, gate_ff2_meta)
        #self.params.append(self.gate_ff2)

        super(CDPNet, self).__init__(noise_std=noise_std,
            params=self.params, num_eps_samples=num_eps_samples, sample_type=sample_type)

    def reset(self):
        """
        Reset inter-lifetime network parameters
        :return: None
        """
        for _param in self.params:
            _param.reset()

    def forward(self, x):
        """
        Forward propagate input value
        :param x: (ndarray) state input
        :return: (ndarray) post synaptic activity at final layer
        """
        pre_synaptic_gate1 = x
        #gated_activity1 = np.where(1/(1 + np.exp(
        #    -self.gate_ff1.forward(pre_synaptic_gate1))) >= 0.5, 1.0, 0.0)

        #gated_activity2 = np.where(1/(1 + np.exp(
        #    -self.gate_ff2.forward(gated_activity1))) >= 0.5, 1.0, 0.0)

        pre_synaptic_ff1 = x
        post_synaptic_ff1 = np.tanh(
            self.recur_plastic_ff1.forward(pre_synaptic_ff1)) #* gated_activity1

        #pre_synaptic_ff2 = post_synaptic_ff1
        #post_synaptic_ff2 = np.tanh(
        #    self.recur_plastic_ff2.forward(pre_synaptic_ff2)) #* gated_activity2

        pre_synaptic_ff3 = post_synaptic_ff1
        post_synaptic_ff3 = \
            self.recur_plastic_ff3.forward(pre_synaptic_ff3)

        if self.action_noise_std is not None:
            x += np.random.randn(*x.shape)*self.action_noise_std

        return post_synaptic_ff3
class CDPNet(ESNetwork):
    def __init__(self,
                 input_size,
                 output_size,
                 noise_std=0.01,
                 action_noise_std=None,
                 num_eps_samples=64,
                 sample_type="antithetic"):
        """
        Eligibility Network with reward modulated plastic weights
        :param input_size: (int) size of observation space
        :param output_size: (int) size of action space
        :param num_eps_samples: (int) number of epsilon samples (population size)
        :param sample_type: (str) network noise sampling type
        """
        self.params = list()  # list of parameters to update
        self.input_size = input_size  # observation space dimensionality
        self.output_size = output_size  # action space dimensionality
        self.action_noise_std = action_noise_std  # action noise standard deviation
        self.ff_connectivity_type = "linear"  # connectivity type -- eligibility

        recur_ff1_meta = {
            "clip": 1,
            "activation": identity,
            "input_size": input_size,
            "output_size": 512
        }
        self.recur_plastic_ff1 = \
            NetworkModule(self.ff_connectivity_type, recur_ff1_meta)
        self.params.append(self.recur_plastic_ff1)

        recur_ff2_meta = {
            "clip": 1,
            "activation": identity,
            "input_size": 512,
            "output_size": 512
        }
        self.recur_plastic_ff2 = \
            NetworkModule("simple_neuromod", recur_ff2_meta)
        self.params.append(self.recur_plastic_ff2)

        recur_ff3_meta = {
            "clip": 1,
            "activation": identity,
            "input_size": 512,
            "output_size": output_size
        }
        self.recur_plastic_ff3 = \
            NetworkModule("simple_neuromod", recur_ff3_meta)
        self.params.append(self.recur_plastic_ff3)

        super(CDPNet, self).__init__(noise_std=noise_std,
                                     params=self.params,
                                     num_eps_samples=num_eps_samples,
                                     sample_type=sample_type)

    def reset(self):
        """
        Reset inter-lifetime network parameters
        :return: None
        """
        for _param in self.params:
            _param.reset()

    def forward(self, x, act=False):
        """
        Forward propagate input value
        :param x: (ndarray) state input
        :return: (ndarray) post synaptic activity at final layer
        """

        pre_synaptic_ff1 = x
        post_synaptic_ff1 = np.tanh(
            self.recur_plastic_ff1.forward(pre_synaptic_ff1))

        pre_synaptic_ff2 = post_synaptic_ff1
        post_synaptic_ff2 = np.tanh(
            self.recur_plastic_ff2.forward(pre_synaptic_ff2))

        pre_synaptic_ff3 = post_synaptic_ff2
        post_synaptic_ff3 = self.recur_plastic_ff3.forward(pre_synaptic_ff3)

        action = post_synaptic_ff3

        return action