def training_seq_pred(self,N_trial,d,stop=True,verb=True,policy='eps_greedy',stoc='soft',t_weighted=True): from TASKS.task_seq_prediction import construct_trial, get_dictionary zero = np.zeros((1,self.S)) corr = 0 conv_tr = 0 E = np.zeros(N_trial) dic_stim, dic_resp = get_dictionary(d) convergence=False for tr in np.arange(N_trial): if verb: print('TRIAL N.',tr+1,':\t', end="") #self.reset_memory() #self.reset_tags() S, O = construct_trial(d,0.5) o_print = dic_resp[np.argmax(O)] s_old = zero for i in np.arange(np.shape(S)[0]): s_inst = S[i:(i+1),:] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst s_print = dic_stim[np.argmax(s_inst)] y_r,y_m,Q = self.feedforward(s_inst, s_trans) resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted) q = Q[0,resp_ind] z = np.zeros(np.shape(Q)) z[0,resp_ind] = 1 r_print = self.dic_resp[resp_ind] if verb: print(s_print,end="-") self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind) q_old = q if i==np.shape(S)[0]-1: if dic_resp[resp_ind]==o_print: r = self.rew_pos corr += 1 else: r = self.rew_neg E[tr] = 1 corr = 0 else: r = 0 RPE = r - q_old # Reward Prediction Error self.update_weights(RPE) if verb: print(dic_resp[resp_ind],'\t(',corr,')') if corr>=100 and convergence==False: conv_tr = tr convergence=True if stop==True: break if conv_tr!=0: print('SIMULATION CONVERGED AT TRIAL ',conv_tr) return E, conv_tr
def training_seq_pred_CPT(self,N_trial,d,stop=True,verb=True): from TASKS.task_seq_prediction_CPT import construct_trial, get_dictionary zero = np.zeros((1,self.S)) corr = 0 conv_tr = 0 E = np.zeros(N_trial) dic_stim, dic_resp = get_dictionary(d) convergence=False for tr in np.arange(N_trial): if verb: print('TRIAL N.',tr+1,':\t', end="") first = True #self.reset_memory() #self.reset_tags() S, O = construct_trial(d,0.5) s_old = zero for i in np.arange(np.shape(S)[0]): s_inst = S[i:(i+1),:] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst s_print = dic_stim[np.argmax(s_inst)] #print(self.sTRACE) o = O[i:(i+1),:] o_print = dic_resp[np.argmax(o)] y_r,y_m,Q = self.feedforward(s_inst, s_trans) resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted,e_weighted) q = Q[0,resp_ind] z = np.zeros(np.shape(Q)) z[0,resp_ind] = 1 r_print = dic_resp[resp_ind] if verb: if i==0: print(s_print,end="-") print(r_print,end="-") elif i==np.shape(S)[0]-1: print(r_print,'\t(',corr,')') else: print(r_print,end="-") if first!=True: RPE = (r + self.discount*q) - q_old # Reward Prediction Error self.update_weights(RPE) else: first = False self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind) q_old = q if r_print==o_print: final = (o_print==dic_resp[self.A-2] or o_print==dic_resp[self.A-1]) r = self.positive_reward_seq_pred(final,self.A-2) corr += 1 else: r = self.rew_neg E[tr] = 1 corr = 0 #print('\t',s_print," - ",r_print,' (',o_print,')','--->',r) RPE = r - q_old # Reward Prediction Error self.update_weights(RPE) if corr>=100 and convergence==False: conv_tr = tr convergence=True if stop==True: break if conv_tr!=0: print('SIMULATION CONVERGED AT TRIAL ',conv_tr) return E, conv_tr
def training_tXOR(self, N_trial, stop=True, verb=True): from TASKS.task_tXOR import construct_trial zero = np.zeros((1, self.S)) corr = 0 conv_tr = 0 E = np.zeros(N_trial) convergence = False for tr in np.arange(N_trial): if verb: print('TRIAL N.', tr + 1, ':\t', end="") self.reset_memory() self.reset_tags() S, O = construct_trial() o_print = self.dic_resp[np.argmax(O)] s_old = zero for i in np.arange(3): s_inst = S[i:(i + 1), :] s_trans = self.define_transient(s_inst, s_old) s_old = s_inst if i != 2: s_print = self.dic_stim[np.argmax(s_inst)] y_r, y_m, Q = self.feedforward(s_inst, s_trans) resp_ind, _ = self.compute_response(Q) q = Q[0, resp_ind] z = np.zeros(np.shape(Q)) z[0, resp_ind] = 1 r_print = self.dic_resp[resp_ind] if verb and i != 2: print(s_print, end="-") self.update_tags(s_inst, s_trans, y_r, y_m, z, resp_ind) q_old = q if i == 2: if self.dic_resp[resp_ind] == o_print: r = self.rew_pos corr += 1 else: r = self.rew_neg E[tr] = 1 corr = 0 else: r = 0 RPE = r - q_old # Reward Prediction Error self.update_weights(RPE) if verb: print(self.dic_resp[resp_ind], '\t(', corr, ')') if corr >= 100 and convergence == False: conv_tr = tr convergence = True if stop == True: break if conv_tr != 0: print('SIMULATION CONVERGED AT TRIAL ', conv_tr) return E, conv_tr