Esempio n. 1
0
def test_reinforcement_fixed_targets():
    input_layer = TransferMechanism(
        size=2,
        name='Input Layer',
    )

    action_selection = pnl.DDM(input_format=pnl.ARRAY,
                               function=pnl.DriftDiffusionAnalytical(),
                               output_states=[pnl.SELECTED_INPUT_ARRAY],
                               name='DDM')

    p = Process(pathway=[input_layer, action_selection],
                learning=LearningProjection(learning_function=Reinforcement(
                    learning_rate=0.05)))

    input_list = {input_layer: [[1, 1], [1, 1]]}
    s = System(processes=[p],
               # learning_rate=0.05,
               )
    targets = [[10.], [10.]]

    # logged_mechanisms = [input_layer, action_selection]
    # for mech in s.learning_mechanisms:
    #     logged_mechanisms.append(mech)
    #
    # for mech in logged_mechanisms:
    #     mech.log.set_log_conditions(items=[pnl.VALUE])

    results = s.run(inputs=input_list, targets=targets)

    assert np.allclose(action_selection.value, [[1.], [2.30401336], [0.97340301], [0.02659699], [2.30401336], \
                                                [2.08614798], [1.85006765], [2.30401336], [2.08614798], [1.85006765]])
Esempio n. 2
0
    def _instantiate_learning_mechanism(self,
                                        learning_function,
                                        learning_rate,
                                        learned_projection,
                                        context=None):

        learning_mechanism = KohonenLearningMechanism(
            default_variable=[
                self.learned_projection.sender.value,
                self.learned_projection.receiver.value
            ],
            matrix=self.matrix,
            function=learning_function,
            learning_rate=learning_rate,
            # learning_signals=[self.matrix],
            name="{} for {}".format(LearningMechanism.className, self.name))

        # KDM 10/22/18: should below be aux_components?
        # FIX: 10/31/19 [JDC]: YES!

        # Instantiate Projection from learned_projection's sender to LearningMechanism
        MappingProjection(
            sender=self.learned_projection.sender,
            receiver=learning_mechanism.input_ports[ACTIVATION_INPUT],
            matrix=IDENTITY_MATRIX,
            name="Error Projection for {}".format(learning_mechanism.name))

        # Instantiate Projection from the Mechanism's INPUT_PATTERN OutputPort
        #    (which has the value of the learned_projection's receiver;  i.e., the Mechanism's input)
        #    to the LearningMechanism's ACTIVATION_OUTPUT InputPort.
        MappingProjection(
            sender=self.output_ports[INPUT_PATTERN],
            receiver=learning_mechanism.input_ports[ACTIVATION_OUTPUT],
            matrix=IDENTITY_MATRIX,
            name="Error Projection for {}".format(learning_mechanism.name))

        # Instantiate Projection from LearningMechanism to learned_projection
        LearningProjection(
            sender=learning_mechanism.output_ports[LEARNING_SIGNAL],
            receiver=self.matrix,
            name="{} for {}".format(LearningProjection.className,
                                    self.learned_projection.name))

        return learning_mechanism
# Princeton University licenses this file to You under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.  You may obtain a copy of the License at:
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

# **********************************************  MappingProjection ****************************************************
"""

Contents
--------
  * `MappingProjection_Overview`
  * `MappingProjection_Creation`
      - `MappingProjection_Matrix_Specification`
      - `MappingProjection_Learning_Specification`
      - `MappingProjection_Deferred_Initialization`
  * `MappingProjection_Structure`
      - `MappingProjection_Sender`
      - `MappingProjection_Receiver`
  * `MappingProjection_Execution`
      - `MappingProjection_Learning`
  * `MappingProjection_Class_Reference`


.. _MappingProjection_Overview:

Overview
--------

A MappingProjection transmits the `value <OutputPort.value>` of an `OutputPort` of one `ProcessingMechanism
Esempio n. 4
0
def test_reinforcement():
    input_layer = TransferMechanism(
        default_variable=[0, 0, 0],
        name='Input Layer',
    )

    action_selection = TransferMechanism(
        default_variable=[0, 0, 0],
        function=SoftMax(
            output=PROB,
            gain=1.0,
        ),
        name='Action Selection',
    )

    p = Process(
        default_variable=[0, 0, 0],
        size=3,
        pathway=[input_layer, action_selection],
        learning=LearningProjection(learning_function=Reinforcement(
            learning_rate=0.05)),
        target=0,
    )

    # print ('reward prediction weights: \n', action_selection.input_states[0].path_afferents[0].matrix)
    # print ('targetMechanism weights: \n', action_selection.output_states.sendsToProjections[0].matrix)

    reward_values = [10, 10, 10]

    # Must initialize reward (won't be used, but needed for declaration of lambda function)
    action_selection.output_state.value = [0, 0, 1]
    # Get reward value for selected action)
    reward = lambda: [
        reward_values[int(np.nonzero(action_selection.output_state.value)[0])]
    ]

    def print_header(system):
        print("\n\n**** TRIAL: ",
              system.scheduler_processing.clock.simple_time)

    def show_weights():
        print(
            'Reward prediction weights: \n',
            action_selection.input_states[0].path_afferents[0].get_mod_matrix(
                s))
        print('\nAction selected:  {}; predicted reward: {}'.format(
            np.nonzero(action_selection.output_state.value)[0][0],
            action_selection.output_state.value[np.nonzero(
                action_selection.output_state.value)[0][0]],
        ))

    input_list = {input_layer: [[1, 1, 1]]}

    s = System(
        processes=[p],
        # learning_rate=0.05,
        targets=[0],
    )

    results = s.run(
        num_trials=10,
        inputs=input_list,
        targets=reward,
        call_before_trial=functools.partial(print_header, s),
        call_after_trial=show_weights,
    )

    results_list = []
    for elem in s.results:
        for nested_elem in elem:
            nested_elem = nested_elem.tolist()
            try:
                iter(nested_elem)
            except TypeError:
                nested_elem = [nested_elem]
            results_list.extend(nested_elem)

    mech_objective_action = s.mechanisms[2]
    mech_learning_input_to_action = s.mechanisms[3]

    reward_prediction_weights = action_selection.input_states[
        0].path_afferents[0]

    expected_output = [
        (input_layer.get_output_values(s), [np.array([1., 1., 1.])]),
        (action_selection.get_output_values(s),
         [np.array([0., 3.71496434, 0.])]),
        (pytest.helpers.expand_np_ndarray(
            mech_objective_action.get_output_values(s)),
         pytest.helpers.expand_np_ndarray(
             [np.array([6.28503566484375]),
              np.array(39.50167330835792)])),
        (pytest.helpers.expand_np_ndarray(
            mech_learning_input_to_action.get_output_values(s)),
         pytest.helpers.expand_np_ndarray([[
             np.array([0., 0.31425178324218755, 0.]),
             np.array([0., 0.31425178324218755, 0.])
         ]])),
        (reward_prediction_weights.get_mod_matrix(s),
         np.array([
             [1., 0., 0.],
             [0., 4.02921612, 0.],
             [0., 0., 1.8775],
         ])),
        (results, [
            [np.array([0., 1., 0.])],
            [np.array([0., 1.45, 0.])],
            [np.array([0., 0., 1.])],
            [np.array([0., 1.8775, 0.])],
            [np.array([0., 2.283625, 0.])],
            [np.array([0., 2.66944375, 0.])],
            [np.array([0., 0., 1.45])],
            [np.array([0., 3.03597156, 0.])],
            [np.array([0., 3.38417298, 0.])],
            [np.array([0., 3.71496434, 0.])],
        ]),
    ]

    for i, exp in enumerate(expected_output):
        val, expected = exp
        np.testing.assert_allclose(
            val, expected, err_msg='Failed on expected_output[{0}]'.format(i))