def test_seq_pred(self,N_test,d,verb=True): from TASKS.task_seq_prediction import construct_test_trial, get_dictionary zero = np.zeros((1,self.S)) corr = 0.0 dic_stim, dic_resp = get_dictionary(d) for tr in np.arange(N_test): if verb: print('TRIAL N.',tr+1,':\t', end="") #self.reset_memory() #self.reset_tags() S, O = construct_test_trial(d,0.5) o_print = dic_resp[np.argmax(O)] s_old = zero self.epsilon = 0 self.beta = 0 for i in np.arange(np.shape(S)[0]): s_inst = S[i:(i+1),:] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst s_print = dic_stim[np.argmax(s_inst)] y_r, y_m, Q = self.feedforward(s_inst, s_trans) resp_ind,_ = self.compute_response(Q,'greedy') q = Q[0,resp_ind] z = np.zeros(np.shape(Q)) z[0,resp_ind] = 1 r_print = self.dic_resp[resp_ind] if verb: print(s_print,end="-") q_old = q if i==np.shape(S)[0]-1: if verb: print(dic_resp[resp_ind],'\n') if dic_resp[resp_ind]==o_print: corr += 1 perc = 100*float(corr/N_test) return perc
from TASKS.task_seq_prediction import subset_construction, get_dictionary task = 'seq_prediction' N_train = 5000 d = 5 ## build training dataset #S_train, O_train = subset_construction(N_train, d) ## or load it S_train = np.loadtxt('DATA/seq_pred_dataset_d5_t5000_inp.txt') O_train = np.loadtxt('DATA/seq_pred_dataset_d5_t5000_out.txt') S_train = np.reshape(S_train, (N_train, -1, d + 2)) dic_stim, dic_resp = get_dictionary(d) ## CONSTRUCTION OF THE AuGMEnT NETWORK S = d + 2 # dimension of the input = number of possible stimuli R = 3 # dimension of the regular units M = 8 # dimension of the memory units A = 2 # dimension of the activity units = number of possible responses # value parameters were taken from the lamb = 0.2 # synaptic tag decay beta = 0.15 # weight update coefficient discount = 0.9 # discount rate for future rewards alpha = 1 - lamb * discount # synaptic permanence eps = 0.025 # percentage of softmax modality for activity selection g = 1
def training_seq_pred_CPT(self,N_trial,d,stop=True,verb=True): from TASKS.task_seq_prediction_CPT import construct_trial, get_dictionary zero = np.zeros((1,self.S)) corr = 0 conv_tr = 0 E = np.zeros(N_trial) dic_stim, dic_resp = get_dictionary(d) convergence=False for tr in np.arange(N_trial): if verb: print('TRIAL N.',tr+1,':\t', end="") first = True #self.reset_memory() #self.reset_tags() S, O = construct_trial(d,0.5) s_old = zero for i in np.arange(np.shape(S)[0]): s_inst = S[i:(i+1),:] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst s_print = dic_stim[np.argmax(s_inst)] #print(self.sTRACE) o = O[i:(i+1),:] o_print = dic_resp[np.argmax(o)] y_r,y_m,Q = self.feedforward(s_inst, s_trans) resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted,e_weighted) q = Q[0,resp_ind] z = np.zeros(np.shape(Q)) z[0,resp_ind] = 1 r_print = dic_resp[resp_ind] if verb: if i==0: print(s_print,end="-") print(r_print,end="-") elif i==np.shape(S)[0]-1: print(r_print,'\t(',corr,')') else: print(r_print,end="-") if first!=True: RPE = (r + self.discount*q) - q_old # Reward Prediction Error self.update_weights(RPE) else: first = False self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind) q_old = q if r_print==o_print: final = (o_print==dic_resp[self.A-2] or o_print==dic_resp[self.A-1]) r = self.positive_reward_seq_pred(final,self.A-2) corr += 1 else: r = self.rew_neg E[tr] = 1 corr = 0 #print('\t',s_print," - ",r_print,' (',o_print,')','--->',r) RPE = r - q_old # Reward Prediction Error self.update_weights(RPE) if corr>=100 and convergence==False: conv_tr = tr convergence=True if stop==True: break if conv_tr!=0: print('SIMULATION CONVERGED AT TRIAL ',conv_tr) return E, conv_tr
def training_seq_pred(self,N_trial,d,stop=True,verb=True,policy='eps_greedy',stoc='soft',t_weighted=True): from TASKS.task_seq_prediction import construct_trial, get_dictionary zero = np.zeros((1,self.S)) corr = 0 conv_tr = 0 E = np.zeros(N_trial) dic_stim, dic_resp = get_dictionary(d) convergence=False for tr in np.arange(N_trial): if verb: print('TRIAL N.',tr+1,':\t', end="") #self.reset_memory() #self.reset_tags() S, O = construct_trial(d,0.5) o_print = dic_resp[np.argmax(O)] s_old = zero for i in np.arange(np.shape(S)[0]): s_inst = S[i:(i+1),:] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst s_print = dic_stim[np.argmax(s_inst)] y_r,y_m,Q = self.feedforward(s_inst, s_trans) resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted) q = Q[0,resp_ind] z = np.zeros(np.shape(Q)) z[0,resp_ind] = 1 r_print = self.dic_resp[resp_ind] if verb: print(s_print,end="-") self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind) q_old = q if i==np.shape(S)[0]-1: if dic_resp[resp_ind]==o_print: r = self.rew_pos corr += 1 else: r = self.rew_neg E[tr] = 1 corr = 0 else: r = 0 RPE = r - q_old # Reward Prediction Error self.update_weights(RPE) if verb: print(dic_resp[resp_ind],'\t(',corr,')') if corr>=100 and convergence==False: conv_tr = tr convergence=True if stop==True: break if conv_tr!=0: print('SIMULATION CONVERGED AT TRIAL ',conv_tr) return E, conv_tr
M=4 mem_vec_c=[] for i in range(M): mem_vec_c.append('M'+str(i+1)+'-C') mem_vec_l=[] for i in range(M): mem_vec_l.append('M'+str(i+1)+'-L') mem_vec_h=[] for i in range(int(M/2)): mem_vec_h.append('M'+str(i+1)+'-L') for i in range(int(M/2)): mem_vec_h.append('M'+str(i+1)+'-C') from TASKS.task_seq_prediction import get_dictionary dic_stim3,_ = get_dictionary(3) dic_stim8,_ = get_dictionary(8) cues_vec_3 = [] values_vec = list(dic_stim3.values()) for l in values_vec: cues_vec_3.append(l+'+') for l in values_vec: cues_vec_3.append(l+'-') cues_vec_8 = [] values_vec = list(dic_stim8.values()) for l in values_vec: cues_vec_8.append(l+'+') for l in values_vec: cues_vec_8.append(l+'-')