def test_deferred_init_default_ModulatoryProjection_names(self): LP1 = pnl.LearningProjection() LP2 = pnl.LearningProjection() assert LP1.name == 'Deferred Init LearningProjection' assert LP2.name == 'Deferred Init LearningProjection-1' CP1 = pnl.ControlProjection() CP2 = pnl.ControlProjection() assert CP1.name == 'Deferred Init ControlProjection' assert CP2.name == 'Deferred Init ControlProjection-1' GP1 = pnl.GatingProjection() GP2 = pnl.GatingProjection() assert GP1.name == 'Deferred Init GatingProjection' assert GP2.name == 'Deferred Init GatingProjection-1'
import psyneulink as pnl import psyneulink.core.components.functions.learningfunctions import psyneulink.core.components.functions.transferfunctions input_layer = pnl.TransferMechanism(size=3, name='Input Layer') action_selection = pnl.TransferMechanism( size=3, function=psyneulink.core.components.functions.transferfunctions.SoftMax( output=pnl.PROB, gain=1.0), name='Action Selection') p = pnl.Process(default_variable=[0, 0, 0], pathway=[input_layer, action_selection], learning=pnl.LearningProjection( learning_function=psyneulink.core.components.functions. learningfunctions.Reinforcement(learning_rate=0.05)), target=0) print('reward prediction weights: \n', action_selection.input_port.path_afferents[0].matrix) print('target_mechanism weights: \n', action_selection.output_port.efferents[0].matrix) actions = ['left', 'middle', 'right'] reward_values = [10, 0, 0] first_reward = 0 # Must initialize reward (won't be used, but needed for declaration of lambda function) action_selection.output_port.value = [0, 0, 1] # Get reward value for selected action)
name='Input Layer' ) action_selection = pnl.TransferMechanism( size=3, function=pnl.SoftMax( output=pnl.PROB, gain=1.0 ), name='Action Selection' ) p = pnl.Process( default_variable=[0, 0, 0], pathway=[input_layer, action_selection], learning=pnl.LearningProjection(learning_function=pnl.Reinforcement(learning_rate=0.05)), target=0 ) print('reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix) print('target_mechanism weights: \n', action_selection.output_state.efferents[0].matrix) actions = ['left', 'middle', 'right'] reward_values = [10, 0, 0] first_reward = 0 # Must initialize reward (won't be used, but needed for declaration of lambda function) action_selection.output_state.value = [0, 0, 1] # Get reward value for selected action)
def model_training_response_extinction(): """ This creates the plot for Figure 5C in the Montague paper. Figure 5C shows 'extinction of response to the sensory cue.' The setup is the same as Figure 5A, except that reward delivery stops at trial 70 """ sample = pnl.TransferMechanism(default_variable=np.zeros(60), name=pnl.SAMPLE) action_selection = pnl.TransferMechanism(default_variable=np.zeros(60), function=pnl.Linear( slope=1.0, intercept=1.0), name='Action Selection') stimulus_onset = 42 reward_delivery = 54 samples = np.zeros(60) samples[stimulus_onset:] = 1 samples = np.tile(samples, (150, 1)) targets = np.zeros(60) targets[reward_delivery] = 1 targets = np.tile(targets, (150, 1)) # stop delivering reward after trial 70 for i in range(71, 150): targets[i][reward_delivery] = 0 pnl.MappingProjection(sender=sample, receiver=action_selection, matrix=np.zeros((60, 60))) learning_projection = pnl.LearningProjection( learning_function=pnl.TDLearning(learning_rate=0.3)) p = pnl.Process(default_variable=np.zeros(60), pathway=[sample, action_selection], learning=learning_projection, size=60, target=np.zeros(60)) trial = 0 def print_header(): nonlocal trial print("\n\n*** EPISODE: {}".format(trial)) input_list = {sample: samples} target_list = {action_selection: targets} s = pnl.System(processes=[p]) delta_vals = np.zeros((150, 60)) trial = 0 def store_delta_vals(): nonlocal trial delta_vals[trial] = s.mechanisms[2].value trial += 1 s.run(num_trials=150, inputs=input_list, targets=target_list, learning=True, call_before_trial=print_header, call_after_trial=store_delta_vals) with plt.style.context('seaborn'): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x_vals, y_vals = np.meshgrid(np.arange(150), np.arange(40, 60, step=1)) ax.plot_surface(x_vals, y_vals, delta_vals[:, 40:60].transpose()) ax.invert_yaxis() ax.set_xlabel("Trial") ax.set_ylabel("Timestep") ax.set_zlabel("∂") ax.set_title("Montague et. al. (1996) -- Figure 5C") plt.show()
def model_training_full_experiment(): """ This creates the plot for figure 5B in the Montague paper. Figure 5B shows the 'entire time course of model responses (trials 1-150).' The setup is the same as in Figure 5A, except that training begins at trial 10. """ sample = pnl.TransferMechanism(default_variable=np.zeros(60), name=pnl.SAMPLE) action_selection = pnl.TransferMechanism(default_variable=np.zeros(60), function=pnl.Linear( slope=1.0, intercept=1.0), name='Action Selection') stimulus_onset = 41 reward_delivery = 54 samples = np.zeros(60) samples[stimulus_onset:] = 1 samples = np.tile(samples, (120, 1)) targets = np.zeros(60) targets[reward_delivery] = 1 targets = np.tile(targets, (120, 1)) # training begins at trial 11 # no reward given every 15 trials to simulate a wrong response no_reward_trials = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 29, 44, 59, 74, 89, 104, 119 ] for t in no_reward_trials: targets[t][reward_delivery] = 0 pnl.MappingProjection(sender=sample, receiver=action_selection, matrix=np.zeros((60, 60))) learning_projection = pnl.LearningProjection( learning_function=pnl.TDLearning(learning_rate=0.3)) p = pnl.Process(default_variable=np.zeros(60), pathway=[sample, action_selection], learning=learning_projection, size=60, target=np.zeros(60)) trial = 0 def print_header(): nonlocal trial print("\n\n*** EPISODE: {}".format(trial)) def store_delta_vals(): nonlocal trial delta_vals[trial] = s.mechanisms[2].value trial += 1 input_list = {sample: samples} target_list = {action_selection: targets} s = pnl.System(processes=[p]) delta_vals = np.zeros((120, 60)) s.run(num_trials=120, inputs=input_list, targets=target_list, learning=True, call_before_trial=print_header, call_after_trial=store_delta_vals) with plt.style.context('seaborn'): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x_vals, y_vals = np.meshgrid(np.arange(120), np.arange(40, 60, step=1)) ax.plot_surface(x_vals, y_vals, delta_vals[:, 40:60].transpose()) ax.invert_yaxis() ax.set_xlabel("Trial") ax.set_ylabel("Timestep") ax.set_zlabel("∂") ax.set_title("Montague et. al. (1996) -- Figure 5B") plt.show()
def model_training(): """ This creates the plot for figure 5A in the Montague paper. Figure 5A is a 'plot of ∂(t) over time for three trials during training (1, 30, and 50).' """ sample = pnl.TransferMechanism(default_variable=np.zeros(60), name=pnl.SAMPLE) action_selection = pnl.TransferMechanism(default_variable=np.zeros(60), function=pnl.Linear( slope=1.0, intercept=0.01), name='Action Selection') stimulus_onset = 41 reward_delivery = 54 samples = np.zeros(60) samples[stimulus_onset:] = 1 samples = np.tile(samples, (120, 1)) targets = np.zeros(60) targets[reward_delivery] = 1 targets = np.tile(targets, (120, 1)) # no reward given every 15 trials to simulate a wrong response targets[14][reward_delivery] = 0 targets[29][reward_delivery] = 0 targets[44][reward_delivery] = 0 targets[59][reward_delivery] = 0 targets[74][reward_delivery] = 0 targets[89][reward_delivery] = 0 pnl.MappingProjection(sender=sample, receiver=action_selection, matrix=np.full((60, 60), 0.0)) learning_projection = pnl.LearningProjection( learning_function=pnl.TDLearning(learning_rate=0.3)) p = pnl.Process(default_variable=np.zeros(60), pathway=[sample, action_selection], learning=learning_projection, size=60, target=np.zeros(60)) trial = 0 def print_header(): nonlocal trial print("\n\n*** EPISODE: {}".format(trial)) def store_delta_vals(): nonlocal trial delta_vals[trial] = s.mechanisms[2].value trial += 1 print('Delta values: \n{0}'.format(s.mechanisms[2].value)) input_list = {sample: samples} target_list = {action_selection: targets} s = pnl.System(processes=[p]) delta_vals = np.zeros((120, 60)) s.run(num_trials=120, inputs=input_list, targets=target_list, learning=True, call_before_trial=print_header, call_after_trial=store_delta_vals) with plt.style.context('seaborn'): plt.plot(delta_vals[0], "-o", label="Trial 1") plt.plot(delta_vals[29], "-s", label="Trial 30") plt.plot(delta_vals[49], "-o", label="Trial 50") plt.title("Montague et. al. (1996) -- Figure 5A") plt.xlabel("Timestep") plt.ylabel("∂") plt.legend() plt.xlim(xmin=35) plt.xticks() plt.show()
# function=pnl.Stability(metric=pnl.ENERGY, # normalize=True), # name='K') conflicts = pnl.IntegratorMechanism(input_states=[action_selection.output_states[2]], function=psyneulink.core.components.functions.statefulfunctions.integratorfunctions.DualAdaptiveIntegrator(short_term_gain=6.0, long_term_gain=6.0, short_term_rate=0.05, long_term_rate=0.2), name='Short- and Long-term conflict') decision_process = pnl.Process(default_variable=[0, 0], pathway=[input_layer, action_selection], learning=pnl.LearningProjection(learning_function=psyneulink.core.components.functions .learningfunctions.Reinforcement( learning_rate=0.03)), # if learning rate set to .3 output state values annealing to [0., 0.] # which leads to error in reward function target=0 ) print('reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix) print('target_mechanism weights: \n', action_selection.output_state.efferents[0].matrix) conflict_process = pnl.Process(pathway=[action_selection, conflicts]) LC_NE = pnl.LCControlMechanism(objective_mechanism=pnl.ObjectiveMechanism(monitored_output_states=[action_selection], name='LC-NE ObjectiveMech'), modulated_mechanisms=[action_selection], integration_method='EULER', initial_w_FitzHughNagumo=initial_u,
#but we do need to specify the size, which will be the size of our input array. input_layer = pnl.TransferMechanism(size=(3), name='INPUT LAYER') #Next, we specify our output layer. This is where we do our sigmoid transformation, by simply applying the Logistic function. #The size we specify for this layer is the number of output nodes we want. In this case, we want the network to return a scalar #for each example (either a 1 or a zero), so our size is 1 output_layer = pnl.TransferMechanism(size=1, function=pnl.Logistic, name='OUTPUT LAYER') #Now, we put them together into a process. #Notice, that we did not need to specify a weighting matrix. One will automatically be generated by psyneulink when we create our #process. # JDC ADDED: # Normally, for learning to occur in a process, we would just specify that learning=pnl.ENABLED. # However, if we want to specify a specific learning function or error_function to be used, then we must # specify it by construction a default LearningProjection and giving it the parameters we want. In this # case it is the error_function, that we will set to CROSS_ENTROPY (using PsyNeulink's Distance Function): net2l = pnl.Process(pathway=[input_layer, output_layer], learning=pnl.LearningProjection( error_function=pnl.Distance(metric=pnl.CROSS_ENTROPY))) #The pathway argument specifies in which order to execute the layers. THis way, the output of one will be mapped to the input of #the next. #To run the process, we will put it into a system. sys2l = pnl.System(processes=[net2l], learning_rate=4) sys2l.show_graph(show_learning=pnl.ALL)
# normalize=True), # name='K') conflicts = pnl.IntegratorMechanism( input_states=[action_selection.output_states[2]], function=pnl.AGTUtilityIntegrator(short_term_gain=6.0, long_term_gain=6.0, short_term_rate=0.05, long_term_rate=0.2), name='Short- and Long-term conflict') decision_process = pnl.Process( default_variable=[0, 0], pathway=[input_layer, action_selection], learning=pnl.LearningProjection( learning_function=pnl.Reinforcement(learning_rate=0.03) ), # if learning rate set to .3 output state values annealing to [0., 0.] # which leads to error in reward function target=0) print('reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix) print('target_mechanism weights: \n', action_selection.output_state.efferents[0].matrix) conflict_process = pnl.Process(pathway=[action_selection, conflicts]) LC_NE = pnl.LCControlMechanism(objective_mechanism=pnl.ObjectiveMechanism( monitored_output_states=[action_selection], name='LC-NE ObjectiveMech'), modulated_mechanisms=[action_selection], integration_method='EULER',
#but we do need to specify the size, which will be the size of our input array. input_layer=pnl.TransferMechanism(size=(3), name='INPUT LAYER') #Next, we specify our output layer. This is where we do our sigmoid transformation, by simply applying the Logistic function. #The size we specify for this layer is the number of output nodes we want. In this case, we want the network to return a scalar #for each example (either a 1 or a zero), so our size is 1 output_layer=pnl.TransferMechanism(size=1, function=psyneulink.core.components.functions.transferfunctions.Logistic, name='OUTPUT LAYER') #Now, we put them together into a process. #Notice, that we did not need to specify a weighting matrix. One will automatically be generated by psyneulink when we create our #process. # JDC ADDED: # Normally, for learning to occur in a process, we would just specify that learning=pnl.ENABLED. # However, if we want to specify a specific learning function or error_function to be used, then we must # specify it by construction a default LearningProjection and giving it the parameters we want. In this # case it is the error_function, that we will set to CROSS_ENTROPY (using PsyNeulink's Distance Function): net2l=pnl.Process(pathway=[input_layer,output_layer], learning=pnl.LearningProjection(error_function=psyneulink.core.components.functions .objectivefunctions.Distance(metric=pnl.CROSS_ENTROPY)) ) #The pathway argument specifies in which order to execute the layers. THis way, the output of one will be mapped to the input of #the next. #To run the process, we will put it into a system. sys2l=pnl.System(processes=[net2l],learning_rate=4) sys2l.show_graph(show_learning=pnl.ALL)