def SMC2(td, beta_softmax=1., numberOfStateSamples=200, numberOfThetaSamples=200, numberOfBetaSamples=50, coefficient=.5, latin_hyp_sampling=True): print('\n') print('Forward Varying Volatility Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') start_time_multi = time.time() # uniform distribution if latin_hyp_sampling: d0 = uniform() print('latin hypercube sampling') else: print('sobolev sampling') # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli rewards = td['reward'] actions = td['A_chosen'] K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(stimuli) # Number of Trials # verification if K == 2: if latin_hyp_sampling == False: raise ValueError( 'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.' ) # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter nuPrior = np.array([ 3, 1e-3 ]) # Prior on Nu, the variance on the projected gaussian random walk gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter try: tauDefault = td['tau'][0] except: tauDefault = td['tau'] log_proba_ = 0. # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T betaWeights = np.zeros(numberOfBetaSamples) betaAncestors = np.arange(numberOfBetaSamples) # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions]) sum_actionLik = np.zeros(numberOfBetaSamples) filt_actionLkd = np.zeros( [numberOfTrials, numberOfBetaSamples, numberOfActions]) # Keep track of probability correct/exploration after switches tsProbability = np.zeros([numberOfBetaSamples, K]) sum_tsProbability = np.zeros(numberOfBetaSamples) # SMC particles initialisation muSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) nuSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) if K == 24: try: latin_hyp_samples = pickle.load( open('../../utils/sobol_200_26.pkl', 'rb')) except: latin_hyp_samples = pickle.load( open('../../models/utils/sobol_200_26.pkl', 'rb')) for beta_idx in range(numberOfBetaSamples): if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd(dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0], betaPrior[0], betaPrior[1]) nuSamples[beta_idx] = useful_functions.ppf_inv_gamma( latin_hyp_samples[:, 1], nuPrior[0], nuPrior[1]) gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:], gammaPrior) gammaSamples[beta_idx] = np.transpose( gammaSamples[beta_idx].T / np.sum(gammaSamples[beta_idx], axis=1)) elif K == 2: muSamples = np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) nuSamples = useful_functions.sample_inv_gamma( nuPrior[0], nuPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.random.dirichlet( gammaPrior, [numberOfBetaSamples, numberOfThetaSamples]) else: raise IndexError('Wrong number of task sets') muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) nuSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) normalisedThetaWeights = np.zeros( [numberOfBetaSamples, numberOfThetaSamples]) logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) currentStateSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) currentTauSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorStateSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorTauSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples essList = np.zeros(numberOfTrials) # Guided SMC variables dirichletParamCandidates = np.zeros(K) # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') for beta_idx in range(numberOfBetaSamples): ances = betaAncestors[beta_idx] # Update theta weights smc_c.bootstrapUpdateStep_c(currentStateSamples[beta_idx], logThetaWeights[beta_idx], currentTauSamples[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, nuSamples[ances], tauDefault, T, \ np.ascontiguousarray(ancestorStateSamples[ances], dtype=np.intc), ancestorTauSamples[ances], ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1]) # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights[beta_idx]) - useful_functions.log_sum( 2 * logThetaWeights[beta_idx]) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights[ beta_idx] = useful_functions.to_normalized_weights( logThetaWeights[beta_idx]) if (essList[T] < coefficient * numberOfThetaSamples): betaMu = np.sum(normalisedThetaWeights[beta_idx] * muSamples[ances]) betaVar = np.sum(normalisedThetaWeights[beta_idx] * (muSamples[ances] - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) nuMu = np.sum(normalisedThetaWeights[beta_idx] * nuSamples[ances]) nuVar = np.sum(normalisedThetaWeights[beta_idx] * (nuSamples[ances] - nuMu)**2) nuAlpha = nuMu**2 / nuVar + 2 nuBeta = nuMu * (nuAlpha - 1) assert (nuAlpha > 0) assert (nuBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] * gammaSamples[ances].T, axis=1) dirichletVar = np.sum(normalisedThetaWeights[beta_idx] * (gammaSamples[ances]**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2 ) / (np.sum(dirichletVar)) - 1 dirichletParamCandidates[:] = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) if K == 2: nuSamplesNew[beta_idx] = useful_functions.sample_inv_gamma( nuAlpha, nuBeta, numberOfThetaSamples) muSamplesNew[beta_idx] = np.random.beta( betaAlpha, betaBeta, numberOfThetaSamples) gammaSamplesNew[beta_idx] = np.random.dirichlet( dirichletParamCandidates, numberOfThetaSamples) elif K == 24: if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd( dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 0], betaAlpha, betaBeta) nuSamplesNew[beta_idx] = useful_functions.ppf_inv_gamma( latin_hyp_samples[:, 1], nuAlpha, nuBeta) gammaSamplesNew[beta_idx] = gammalib.ppf( latin_hyp_samples[:, 2:], dirichletParamCandidates) gammaSamplesNew[beta_idx] = np.transpose( gammaSamplesNew[beta_idx].T / np.sum(gammaSamplesNew[beta_idx], axis=1)) logThetaWeightsNew[beta_idx] = 0. normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples else: muSamplesNew[beta_idx] = muSamples[ances] gammaSamplesNew[beta_idx] = gammaSamples[ances] nuSamplesNew[beta_idx] = nuSamples[ances] logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx] # task set probability sum_tsProbability[:] = 0. for ts_idx in range(K): tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentStateSamples == ts_idx), axis=2), axis=1) sum_tsProbability += tsProbability[:, ts_idx] tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability) # Compute action likelihood sum_actionLik[:] = 0. for action_idx in range(numberOfActions): actionLikelihood[:, action_idx] = np.exp( np.log( np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] == action_idx], axis=1)) * beta_softmax) sum_actionLik += actionLikelihood[:, action_idx] rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik) betaWeights[:] = actionLikelihood[:, actions[T].astype(int)] filt_actionLkd[T] = actionLikelihood log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples) betaWeights = betaWeights / sum(betaWeights) betaAncestors[:] = useful_functions.stratified_resampling(betaWeights) # update particles muSamples[:] = muSamplesNew gammaSamples[:] = gammaSamplesNew nuSamples[:] = nuSamplesNew logThetaWeights[:] = logThetaWeightsNew[betaAncestors] ancestorTauSamples[:] = currentTauSamples ancestorStateSamples[:] = currentStateSamples elapsed_time = time.time() - start_time_multi return log_proba_, filt_actionLkd
def SMC2(td, show_progress=False, numberOfStateSamples=1000, numberOfThetaSamples=1000, coefficient=.5): print('\n') print('Constant Volatility Model') print('\n') #Start timer start_time_multi = time.time() # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter tauPrior = np.array( [1, 1]) # Prior on Tau, the switch parameter (the volatility) gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T actions = np.zeros(numberOfTrials) - 1 rewards = np.zeros(numberOfTrials, dtype=bool) # Keep track of probability correct/exploration after switches countPerformance = np.zeros( numberOfTrials) # Number of correct actions after i trials countExploration = np.zeros( numberOfTrials) # Number of exploratory actions after i trials correct_before_switch = np.empty(0) # The correct task set before switch tsProbability = np.zeros([numberOfTrials, K]) acceptanceProba = 0. volTracking = np.zeros(numberOfTrials) volStdTracking = np.zeros(numberOfTrials) betaTracking = np.zeros(numberOfTrials) betaStdTracking = np.zeros(numberOfTrials) acceptance_list = [1.] time_list = [start_time_multi] # SMC particles initialisation betaSamples = np.random.beta(betaPrior[0], betaPrior[1], numberOfThetaSamples) tauSamples = np.random.beta(tauPrior[0], tauPrior[1], numberOfThetaSamples) gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples) logThetaWeights = np.zeros(numberOfThetaSamples) logThetaLks = np.zeros(numberOfThetaSamples) currentSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples unnormalisedAncestorsWeights = np.ones( [numberOfThetaSamples, numberOfStateSamples]) essList = np.zeros(numberOfTrials) tasksetLikelihood = np.zeros(K) # Guided SMC variables betaSamplesNew = np.zeros(numberOfThetaSamples) tauSamplesNew = np.zeros(numberOfThetaSamples) gammaSamplesNew = np.zeros([numberOfThetaSamples, K]) stateSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples]) logThetaLksNew = np.zeros(numberOfThetaSamples) dirichletParamCandidates = np.zeros(K) stateSamplesCandidates = np.zeros(numberOfStateSamples, dtype=np.intc) weightsSamplesCandidates = np.zeros(numberOfStateSamples) idxTrajectories = np.zeros(numberOfThetaSamples) # Plot progress if show_progress: plt.figure(figsize=(12, 9)) # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() time_list.append(time.time() - start_time_multi) if (T + 1) % 100 == 0: print('\n') if T > 0: smc_c.guidedUpdateStep_c(logThetaLks, logThetaWeights, np.ascontiguousarray(currentSamples), gammaSamples, betaSamples/2. + 1./2, tauSamples/2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, \ np.ascontiguousarray(mapping), stimuli[T-2], stimuli[T-1], rewards[T-1], actions[T-1]) ancestorSamples = np.array(currentSamples) # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) if (essList[T] < coefficient * numberOfThetaSamples) and ( acceptance_list[-1] > 0.05): acceptanceProba = 0. tauMu = np.sum(normalisedThetaWeights * tauSamples) tauVar = np.sum(normalisedThetaWeights * (tauSamples - tauMu)**2) tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2 tauBeta = tauAlpha * (1 / tauMu - 1) assert (tauAlpha > 0) assert (tauBeta > 0) betaMu = np.sum(normalisedThetaWeights * betaSamples) betaVar = np.sum(normalisedThetaWeights * (betaSamples - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T, axis=1) dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / ( np.sum(dirichletVar)) - 1 dirichletParamCandidates = dirichletMeans * dirichletPrecision assert ((dirichletParamCandidates > 0).all()) idxTrajectories = useful_functions.stratified_resampling( normalisedThetaWeights) for theta_idx in range(numberOfThetaSamples): tauCandidate = np.random.beta(tauAlpha, tauBeta) betaCandidate = np.random.beta(betaAlpha, betaBeta) gammaCandidate = np.random.dirichlet(dirichletParamCandidates) # Launch guidedSMC logLksCandidate = smc_c.guidedSmc_c(np.ascontiguousarray(stateSamplesCandidates), weightsSamplesCandidates, gammaCandidate, betaCandidate/2. + 1./2, tauCandidate/2., np.ascontiguousarray(mapping), \ np.ascontiguousarray(stimuli[:T], dtype=np.intc), np.ascontiguousarray(rewards[:T], dtype=np.intc), np.ascontiguousarray(actions[:T], dtype=np.intc), numberOfStateSamples) # Update a trajectory idx_traj = idxTrajectories[theta_idx] priorsLogRatio = useful_functions.log_dirichlet_pdf( gammaCandidate, gammaPrior) - useful_functions.log_dirichlet_pdf( gammaSamples[idx_traj], gammaPrior) transLogRatio = useful_functions.log_beta_pdf(tauSamples[idx_traj], tauAlpha, tauBeta) + useful_functions.log_beta_pdf(betaSamples[idx_traj], betaAlpha, betaBeta) + useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], dirichletParamCandidates) - \ useful_functions.log_beta_pdf(tauCandidate, tauAlpha, tauBeta) - useful_functions.log_beta_pdf(betaCandidate, betaAlpha, betaBeta) - useful_functions.log_dirichlet_pdf(gammaCandidate, dirichletParamCandidates) logLkdRatio = logLksCandidate - logThetaLks[idx_traj] logAlpha = min(0, priorsLogRatio + transLogRatio + logLkdRatio) U = np.random.rand() # Accept or Reject if np.log(U) < logAlpha: acceptanceProba += 1. betaSamplesNew[theta_idx] = betaCandidate tauSamplesNew[theta_idx] = tauCandidate gammaSamplesNew[theta_idx] = gammaCandidate stateSamplesNew[theta_idx] = stateSamplesCandidates logThetaLksNew[theta_idx] = logLksCandidate weightsSamplesNew[theta_idx] = weightsSamplesCandidates else: betaSamplesNew[theta_idx] = betaSamples[idx_traj] tauSamplesNew[theta_idx] = tauSamples[idx_traj] gammaSamplesNew[theta_idx] = gammaSamples[idx_traj] stateSamplesNew[theta_idx] = ancestorSamples[idx_traj] logThetaLksNew[theta_idx] = logThetaLks[idx_traj] weightsSamplesNew[theta_idx] = ancestorsWeights[idx_traj] print('\n') print('acceptance ratio is ') print(acceptanceProba / numberOfThetaSamples) print('\n') acceptance_list.append(acceptanceProba / numberOfThetaSamples) ancestorsWeights = np.array(weightsSamplesNew) logThetaLks = np.array(logThetaLksNew) logThetaWeights = np.zeros(numberOfThetaSamples) ancestorSamples = np.array(stateSamplesNew) betaSamples = np.array(betaSamplesNew) tauSamples = np.array(tauSamplesNew) gammaSamples = np.array(gammaSamplesNew) normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) # Launch bootstrap update smc_c.bootstrapUpdateStep_c(np.ascontiguousarray(currentSamples), gammaSamples, betaSamples / 2. + 1. / 2, tauSamples / 2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T - 1]) # Take decision for ts_idx in range(K): tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentSamples == ts_idx), axis=1)) # Select action and compute vol volTracking[T] = np.sum(normalisedThetaWeights * tauSamples) volStdTracking[T] = np.sum(normalisedThetaWeights * (tauSamples - volTracking[T])**2) betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples) betaStdTracking[T] = np.sum(normalisedThetaWeights * (betaSamples - betaTracking[T])**2) rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] if show_progress: plt.subplot(3, 2, 1) plt.imshow(tsProbability[:T].T, aspect='auto') plt.hold(True) plt.plot(Z_true[:T], 'w--') plt.axis([0, T - 1, 0, K - 1]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('p(TS|past) at current time') plt.subplot(3, 2, 2) plt.plot(volTracking[:T], 'b') plt.hold(True) plt.fill_between(np.arange(T), volTracking[:T] - volStdTracking[:T], volTracking[:T] + volStdTracking[:T], facecolor=[.5, .5, 1], color=[.5, .5, 1]) plt.plot(td['tau'], 'b--', linewidth=2) plt.axis([0, T - 1, 0, .5]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('Volatility') plt.subplot(3, 2, 3) x = np.linspace(0.01, .99, 100) plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]), 'r') plt.hold(True) plt.plot([betaTracking[T], betaTracking[T]], plt.gca().get_ylim(), 'r', linewidth=2) plt.plot([td['beta'], td['beta']], plt.gca().get_ylim(), 'r--', linewidth=2) plt.hold(False) plt.xlabel('Parameters') plt.ylabel('Gaussian pdf') plt.subplot(3, 2, 4) plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [ coefficient * numberOfThetaSamples, coefficient * numberOfThetaSamples ], 'g--', linewidth=2) plt.axis([0, T - 1, 0, numberOfThetaSamples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ESS') plt.subplot(3, 2, 5) plt.plot(np.divide(countPerformance[:T], np.arange(T) + 1), 'k--', linewidth=2) plt.hold(True) plt.axis([0, T - 1, 0, 1]) plt.hold(False) plt.xlabel('Trials') plt.ylabel('Performance') plt.draw() elapsed_time = time.time() - start_time_multi return [ td, tauSamples, volTracking, volStdTracking, betaSamples, betaTracking, betaStdTracking, gammaSamples, tsProbability, countPerformance, actions, acceptance_list, essList, time_list, elapsed_time ]
def SMC2(td, show_progress=True, numberOfStateSamples=1000, numberOfThetaSamples=1000, coefficient=.5, beta_softmax=None): print('Varying Volatility Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') #Start timer start_time_multi = time.time() # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter nuPrior = np.array([ 3, 1e-3 ]) # Prior on Nu, the variance on the projected gaussian random walk gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter try: tauDefault = td['tau'][0] except: tauDefault = td['tau'] # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros( numberOfActions ) # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets actions = np.zeros(numberOfTrials) - 1 rewards = np.zeros(numberOfTrials, dtype=bool) # Keep track of probability correct/exploration after switches countPerformance = np.zeros( numberOfTrials) # Number of correct actions after i trials countExploration = np.zeros( numberOfTrials) # Number of exploratory actions after i trials correct_before_switch = np.empty(0) # The correct task set before switch tsProbability = np.zeros([numberOfTrials, K]) volTracking = np.zeros(numberOfTrials) # Volatility with time volStdTracking = np.zeros(numberOfTrials) nuTracking = np.zeros(numberOfTrials) nuStdTracking = np.zeros(numberOfTrials) betaTracking = np.zeros(numberOfTrials) betaStdTracking = np.zeros(numberOfTrials) acceptanceProba = 0. # Acceptance proba time_list = [start_time_multi] # SMC particles initialisation betaSamples = np.random.beta(betaPrior[0], betaPrior[1], numberOfThetaSamples) nuSamples = useful_functions.sample_inv_gamma(nuPrior[0], nuPrior[1], numberOfThetaSamples) gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples) logThetaWeights = np.zeros(numberOfThetaSamples) currentStateSamples = np.zeros( [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) currentTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorStateSamples = np.zeros( [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples unnormalisedAncestorsWeights = np.ones( [numberOfThetaSamples, numberOfStateSamples]) essList = np.zeros(numberOfTrials) tasksetLikelihood = np.zeros(K) # Guided SMC variables dirichletParamCandidates = np.zeros(K) # Plot progress if show_progress: plt.figure(figsize=(12, 9)) plt.ion() # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() time_list.append(time.time() - start_time_multi) if (T + 1) % 100 == 0: print('\n') # Update theta weights smc_c.bootstrapUpdateStep_c(currentStateSamples, logThetaWeights, currentTauSamples, gammaSamples, betaSamples/2. + 1/2., nuSamples, tauDefault, T, \ np.ascontiguousarray(ancestorStateSamples, dtype=np.intc), ancestorTauSamples, ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1]) ancestorTauSamples = np.array(currentTauSamples) ancestorStateSamples = np.array(currentStateSamples) # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) if (essList[T] < coefficient * numberOfThetaSamples): acceptanceProba = 0. betaMu = np.sum(normalisedThetaWeights * betaSamples) betaVar = np.sum(normalisedThetaWeights * (betaSamples - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) nuMu = np.sum(normalisedThetaWeights * nuSamples) nuVar = np.sum(normalisedThetaWeights * (nuSamples - nuMu)**2) nuAlpha = nuMu**2 / nuVar + 2 nuBeta = nuMu * (nuAlpha - 1) assert (nuAlpha > 0) assert (nuBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T, axis=1) dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / ( np.sum(dirichletVar)) - 1 dirichletParamCandidates = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) nuSamples = useful_functions.sample_inv_gamma( nuAlpha, nuBeta, numberOfThetaSamples) betaSamples = np.random.beta(betaAlpha, betaBeta, numberOfThetaSamples) gammaSamples = np.random.dirichlet(dirichletParamCandidates, numberOfThetaSamples) logThetaWeights[:] = 0 normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) # Take decision for ts_idx in range(K): tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentStateSamples == ts_idx), axis=1)) # Todo : change!!! take out currentAncestorsWeights if beta_softmax is None: # Compute action likelihood for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.sum( tsProbability[T, mapping[stimuli[T]] == action_idx]) # Select action actions[T] = np.argmax(actionLikelihood) else: # Compute action likelihood tsProbability[T] /= sum(tsProbability[T]) for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.exp( np.log( np.sum(tsProbability[ T, mapping[stimuli[T].astype(int)] == action_idx])) * beta_softmax) actionLikelihood /= sum(actionLikelihood) # Select action actions[T] = np.where( np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0] # Select action and compute vol, nu, beta for tracking volTracking[T] = np.sum( normalisedThetaWeights * (np.sum(currentTauSamples, axis=1) / numberOfStateSamples)) volStdTracking[T] = np.sum(normalisedThetaWeights * (np.sum(currentTauSamples**2, axis=1) / numberOfStateSamples)) - volTracking[T]**2 nuTracking[T] = np.sum(normalisedThetaWeights * nuSamples) nuStdTracking[T] = np.sum(normalisedThetaWeights * (nuSamples - nuTracking[T])**2) betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples) betaStdTracking[T] = np.sum(normalisedThetaWeights * (betaSamples - betaTracking[T])**2) # Update performance if K == 2: assert (mapping[stimuli[T].astype(int), Z_true[T].astype(int)] == Z_true[T]) if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int), Z_true[T].astype(int)]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 elif (K == 24) and (actions[T] == td['A_correct'][T]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 else: rewards[T] = td['trap'][T] if show_progress: plt.subplot(3, 2, 1) plt.imshow(tsProbability[:T].T, aspect='auto') plt.hold(True) plt.plot(Z_true[:T], 'w--') plt.axis([0, T - 1, 0, K - 1]) plt.hold(False) plt.xlabel('trials') plt.ylabel('p(TS|past) at current time') plt.subplot(3, 2, 2) plt.plot(volTracking[:T], 'b') plt.hold(True) plt.fill_between(np.arange(T), volTracking[:T] - volStdTracking[:T], volTracking[:T] + volStdTracking[:T], facecolor=[.5, .5, 1], color=[.5, .5, 1]) plt.plot(td['tau'], 'b--', linewidth=2) plt.axis([0, T - 1, 0, .5]) plt.hold(False) plt.xlabel('trials') plt.ylabel('Volatility') plt.subplot(3, 2, 3) x = np.linspace(0.01, .99, 100) plt.plot(x, normlib.pdf(x, nuTracking[T], nuStdTracking[T]), 'b') plt.hold(True) plt.plot([nuTracking[T], nuTracking[T]], plt.gca().get_ylim(), 'b', linewidth=2) plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]), 'r') plt.plot([betaTracking[T], betaTracking[T]], plt.gca().get_ylim(), 'r', linewidth=2) plt.plot([td['beta'], td['beta']], plt.gca().get_ylim(), 'r--', linewidth=2) plt.hold(False) plt.xlabel('Parameters') plt.ylabel('Gaussian pdf') plt.subplot(3, 2, 4) plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [ coefficient * numberOfThetaSamples, coefficient * numberOfThetaSamples ], 'g--', linewidth=2) plt.axis([0, T - 1, 0, numberOfThetaSamples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ESS') plt.subplot(3, 2, 5) plt.plot(np.divide(countPerformance[:T], np.arange(T) + 1), 'k--', linewidth=2) plt.hold(True) plt.axis([0, T - 1, 0, 1]) plt.hold(False) plt.xlabel('Trials') plt.ylabel('Performance') plt.draw() plt.show() plt.pause(0.1) elapsed_time = time.time() - start_time_multi return [ td, nuSamples, nuTracking, nuStdTracking, volTracking, volTracking, betaSamples, betaTracking, betaStdTracking, gammaSamples, tsProbability, countPerformance, actions, essList, time_list, elapsed_time ]
def SMC2(td, show_progress=True, lambdaa=.9, eta=0., inertie_noise=0., numberOfStateSamples=2000, numberOfThetaSamples=1000, coefficient=.5, beta_softmax=None, espilon_softmax=0.): print( 'precision model with lambda = {0} and eta = {1}, epsilon= {4}, inertie_noise={5}. Number of state samples : {2} and number of theta samples : {3}' .format(lambdaa, eta, numberOfStateSamples, numberOfThetaSamples, espilon_softmax, inertie_noise)) #Start timer start_time_multi = time.time() # Extract parameters from task description stimuli = np.ascontiguousarray(td['S'], dtype=np.intc) # Sequence of Stimuli Z = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z) # Number of Trials # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter dirichletPrior = np.ones(K) # Mapping from task set to correct action per stimulus mapping = np.ascontiguousarray(get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T, dtype=np.intc) Z_true = Z # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros( numberOfActions ) # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets actions = np.ascontiguousarray(np.zeros(numberOfTrials) - 1, dtype=np.intc) rewards = np.ascontiguousarray(np.zeros(numberOfTrials), dtype=np.intc) # Keep track of probability correct/exploration after switches countPerformance = np.zeros( numberOfTrials) # Number of correct actions after i trials countExploration = np.zeros( numberOfTrials) # Number of exploratory actions after i trials correct_before_switch = np.empty(0) # The correct task set before switch tsProbability = np.zeros([numberOfTrials, K]) acceptanceProba = 0. betaTracking = np.zeros(numberOfTrials) betaStdTracking = np.zeros(numberOfTrials) temperatureTracking = np.zeros(numberOfTrials) temperatureStdTracking = np.zeros(numberOfTrials) acceptance_list = [1.] transitionProba = np.zeros([numberOfThetaSamples, K, K]) # SMC particles initialisation betaSamples = np.random.beta(betaPrior[0], betaPrior[1], numberOfThetaSamples) gammaSamples = np.random.dirichlet(dirichletPrior, numberOfThetaSamples) logThetaWeights = np.zeros(numberOfThetaSamples) logThetaLks = np.zeros(numberOfThetaSamples) currentTaskSetSamples = np.zeros( [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorTaskSetSamples = np.zeros( [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsList = np.zeros([numberOfThetaSamples, numberOfStateSamples]) essList = np.zeros(numberOfTrials) tasksetLikelihood = np.zeros(K) currentTemperatures = np.zeros(numberOfTrials) entropies = np.zeros(numberOfTrials) temperature = 0.5 # variables for speed-up ante_proba_local = np.zeros(K) post_proba_local = np.zeros(K) sum_weightsList = np.zeros(numberOfThetaSamples) ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc) gammaAdaptedProba = np.zeros(K) likelihoods = np.zeros(K) positiveStates = np.zeros(K, dtype=np.intc) distances = np.zeros([numberOfThetaSamples, 1]) currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples]) noise_amount = np.zeros(numberOfTrials) # Plot progress if show_progress: plt.figure(figsize=(12, 9)) plt.ion() # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') noise_amount[T] = smc_c.bootstrap_smc_step_c(logThetaWeights, distances, betaSamples/2. + 1/2., lambdaa, eta, inertie_noise, gammaSamples, currentTaskSetSamples, ancestorTaskSetSamples, weightsList, \ mapping, stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\ post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, float(temperature)) if temperature is None: assert (False) entropies[T] = entropy( np.asarray([np.sum(currentTaskSetSamples == i) for i in range(K)]) * 1. / (numberOfThetaSamples * numberOfStateSamples)) ancestorTaskSetSamples[:] = currentTaskSetSamples # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) if essList[T] < coefficient * numberOfThetaSamples and acceptance_list[ -1] > 0.05: acceptanceProba = 0. betaMu = np.sum(normalisedThetaWeights * betaSamples) betaVar = np.sum(normalisedThetaWeights * (betaSamples - betaMu)**2) betaAlpha = np.maximum( ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2, 1) betaBeta = np.maximum(betaAlpha * (1 / betaMu - 1), 1.) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T, axis=1) dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / ( np.sum(dirichletVar)) - 1 dirichletParamCandidates = dirichletMeans * dirichletPrecision dirichletParamCandidates = np.maximum(dirichletParamCandidates, 1.) assert ((dirichletParamCandidates > 0).all()) logThetaWeights[:] = 0 betaSamples = np.random.beta(betaAlpha, betaBeta, numberOfThetaSamples) gammaSamples = np.random.dirichlet(dirichletParamCandidates, numberOfThetaSamples) normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) # Take decision for ts_idx in range(K): tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentTaskSetSamples == ts_idx), axis=1)) if beta_softmax is None: # Compute action likelihood for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.sum( tsProbability[T, mapping[stimuli[T]] == action_idx]) # Select action actions[T] = np.argmax(actionLikelihood) else: # Compute action likelihood tsProbability[T] /= sum(tsProbability[T]) for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.exp( np.log( np.sum(tsProbability[ T, mapping[stimuli[T].astype(int)] == action_idx])) * beta_softmax) actionLikelihood /= sum(actionLikelihood) actionLikelihood = actionLikelihood * ( 1 - espilon_softmax) + espilon_softmax / K # Select action actions[T] = np.where( np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0] betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples) betaStdTracking[T] = np.sum(normalisedThetaWeights * (betaSamples - betaTracking[T])**2) temperatureTracking[T] = np.mean(currentNoises) temperatureStdTracking[T] = np.std(currentNoises) if K == 2: assert (mapping[stimuli[T].astype(int), Z_true[T].astype(int)] == Z_true[T]) if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int), Z_true[T].astype(int)]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 elif (K == 24) and (actions[T] == td['A_correct'][T]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 else: rewards[T] = td['trap'][T] if show_progress: plt.subplot(3, 2, 1) plt.imshow(tsProbability[:T].T, aspect='auto') plt.hold(True) plt.plot(Z_true[:T], 'w--') plt.axis([0, T - 1, 0, K - 1]) plt.hold(False) plt.xlabel('trials') plt.ylabel('p(TS|past) at current time') plt.subplot(3, 2, 2) plt.plot(temperatureTracking[:T]) plt.fill_between( np.arange(T), temperatureTracking[:T] - temperatureStdTracking[:T], temperatureTracking[:T] + temperatureStdTracking[:T], facecolor=[.5, .5, 1], color=[.5, .5, 1]) plt.hold(False) plt.xlabel('trials') plt.ylabel('Temperature') plt.subplot(3, 2, 3) x = np.linspace(0.01, .99, 100) plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]), 'r') plt.hold(True) plt.plot([betaTracking[T], betaTracking[T]], plt.gca().get_ylim(), 'r', linewidth=2) plt.plot([td['beta'], td['beta']], plt.gca().get_ylim(), 'r--', linewidth=2) plt.hold(False) plt.xlabel('Parameters') plt.ylabel('Gaussian pdf') plt.subplot(3, 2, 4) plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [ coefficient * numberOfThetaSamples, coefficient * numberOfThetaSamples ], 'g--', linewidth=2) plt.axis([0, T - 1, 0, numberOfThetaSamples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ESS') plt.subplot(3, 2, 5) plt.plot(np.divide(countPerformance[:T], np.arange(T) + 1), 'k--', linewidth=2) plt.hold(True) plt.axis([0, T - 1, 0, 1]) plt.hold(False) plt.xlabel('Trials') plt.ylabel('Performance') plt.draw() plt.show() plt.pause(0.1) elapsed_time = time.time() - start_time_multi return [ td, noise_amount, lambdaa, eta, betaSamples, betaTracking, betaStdTracking, currentTemperatures, temperatureTracking, temperatureStdTracking, gammaSamples, tsProbability, countPerformance, actions, acceptance_list, elapsed_time ]
def SMC2(td, beta_softmax=1., lambda_noise=.4, eta_noise=.1, epsilon_softmax=0., noise_inertie=0., numberOfStateSamples=200, numberOfThetaSamples=200, numberOfBetaSamples=20, coefficient=.5, latin_hyp_sampling=True): print('\n') print('Noisy Forward Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') #Start timer start_time_multi = time.time() # uniform distribution if latin_hyp_sampling: d0 = uniform() print('latin hypercube sampling') else: print('sobolev sampling') # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli rewards = td['reward'] actions = td['A_chosen'] K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials distances = np.zeros([numberOfThetaSamples, 1]) # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter gammaPrior = np.ones(K) # Prior on Gamma, the Dirichlet parameter log_proba_ = 0. # verification if K == 2: if latin_hyp_sampling == False: raise ValueError( 'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.' ) # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T betaWeights = np.zeros(numberOfBetaSamples) betaLog = np.zeros(numberOfBetaSamples) logbetaWeights = np.zeros(numberOfBetaSamples) betaAncestors = np.arange(numberOfBetaSamples) # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions]) sum_actionLik = np.zeros(numberOfBetaSamples) filt_actionLkd = np.zeros( [numberOfTrials, numberOfBetaSamples, numberOfActions]) # Keep track of probability correct/exploration after switches tsProbability = np.zeros([numberOfBetaSamples, K]) sum_tsProbability = np.zeros(numberOfBetaSamples) dirichletParamCandidates = np.zeros(K) # SMC particles initialisation muSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples] ) #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) if K == 24: try: latin_hyp_samples = pickle.load( open('../../utils/sobol_200_25.pkl', 'rb')) except: latin_hyp_samples = pickle.load( open('../../models/utils/sobol_200_25.pkl', 'rb')) for beta_idx in range(numberOfBetaSamples): if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd(dist=d0, size=numberOfThetaSamples, dims=K + 1) muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0], betaPrior[0], betaPrior[1]) gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 1:], gammaPrior) gammaSamples[beta_idx] = np.transpose( gammaSamples[beta_idx].T / np.sum(gammaSamples[beta_idx], axis=1)) elif K == 2: muSamples = np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.random.dirichlet( gammaPrior, [numberOfBetaSamples, numberOfThetaSamples]) else: raise IndexError('Wrong number of task sets') logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) currentSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples]) log_proba_corr = 0. ante_proba_local = np.zeros(K) post_proba_local = np.zeros(K) sum_weightsList = np.zeros(numberOfThetaSamples) ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc) gammaAdaptedProba = np.zeros(K) likelihoods = np.zeros(K) positiveStates = np.zeros(K, dtype=np.intc) # Guided SMC variables muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) normalisedThetaWeights = np.zeros( [numberOfBetaSamples, numberOfThetaSamples]) temperatures = np.zeros(numberOfBetaSamples) temperatureAncestors = np.zeros(numberOfBetaSamples) + .5 # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') for beta_idx in range(numberOfBetaSamples): ances = betaAncestors[beta_idx] temperatures[beta_idx] = smc_c.bootstrap_smc_step_c(logThetaWeights[beta_idx], distances, muSamples[ances]/2. + 1./2, lambda_noise, eta_noise, noise_inertie, gammaSamples[ances], currentSamples[beta_idx], ancestorSamples[ances], weightsList, \ np.ascontiguousarray(mapping), stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\ post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, temperatureAncestors[ances]) # Move step normalisedThetaWeights[ beta_idx] = useful_functions.to_normalized_weights( logThetaWeights[beta_idx]) ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2) if (ess < coefficient * numberOfThetaSamples): acceptanceProba = 0. betaMu = np.sum(normalisedThetaWeights[beta_idx] * muSamples[ances]) betaVar = np.sum(normalisedThetaWeights[beta_idx] * (muSamples[ances] - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] * gammaSamples[ances].T, axis=1) dirichletVar = np.sum(normalisedThetaWeights[beta_idx] * (gammaSamples[ances]**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2 ) / (np.sum(dirichletVar)) - 1 dirichletParamCandidates[:] = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) if K == 2: muSamplesNew[beta_idx] = np.random.beta( betaAlpha, betaBeta, numberOfThetaSamples) gammaSamplesNew[beta_idx] = np.random.dirichlet( dirichletParamCandidates, numberOfThetaSamples) if K == 24: if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd( dist=d0, size=numberOfThetaSamples, dims=K + 1) muSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 0], betaAlpha, betaBeta) gammaSamplesNew[beta_idx] = gammalib.ppf( latin_hyp_samples[:, 1:], dirichletParamCandidates) gammaSamplesNew[beta_idx] = np.transpose( gammaSamplesNew[beta_idx].T / np.sum(gammaSamplesNew[beta_idx], axis=1)) logThetaWeightsNew[beta_idx] = 0. normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples else: muSamplesNew[beta_idx] = muSamples[ances] gammaSamplesNew[beta_idx] = gammaSamples[ances] logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx] # task set probability sum_tsProbability[:] = 0. for ts_idx in range(K): tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentSamples == ts_idx), axis=2), axis=1) sum_tsProbability += tsProbability[:, ts_idx] tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability) # Compute action likelihood sum_actionLik[:] = 0. for action_idx in range(numberOfActions): actionLikelihood[:, action_idx] = np.exp( np.log( np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] == action_idx], axis=1)) * beta_softmax) sum_actionLik += actionLikelihood[:, action_idx] rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] actionLikelihood[:] = np.transpose( actionLikelihood.T / sum_actionLik) * ( 1 - epsilon_softmax) + epsilon_softmax / numberOfActions betaWeights[:] = actionLikelihood[:, actions[T].astype(int)] filt_actionLkd[T] = actionLikelihood log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples) betaWeights = betaWeights / sum(betaWeights) betaAncestors[:] = useful_functions.stratified_resampling(betaWeights) # update particles muSamples[:] = muSamplesNew gammaSamples[:] = gammaSamplesNew logThetaWeights[:] = logThetaWeightsNew[betaAncestors] ancestorSamples[:] = currentSamples temperatureAncestors[:] = temperatures elapsed_time = time.time() - start_time_multi return log_proba_