def SMC2(td, show_progress=False, numberOfStateSamples=1000, numberOfThetaSamples=1000, coefficient=.5): print('\n') print('Constant Volatility Model') print('\n') #Start timer start_time_multi = time.time() # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter tauPrior = np.array( [1, 1]) # Prior on Tau, the switch parameter (the volatility) gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T actions = np.zeros(numberOfTrials) - 1 rewards = np.zeros(numberOfTrials, dtype=bool) # Keep track of probability correct/exploration after switches countPerformance = np.zeros( numberOfTrials) # Number of correct actions after i trials countExploration = np.zeros( numberOfTrials) # Number of exploratory actions after i trials correct_before_switch = np.empty(0) # The correct task set before switch tsProbability = np.zeros([numberOfTrials, K]) acceptanceProba = 0. volTracking = np.zeros(numberOfTrials) volStdTracking = np.zeros(numberOfTrials) betaTracking = np.zeros(numberOfTrials) betaStdTracking = np.zeros(numberOfTrials) acceptance_list = [1.] time_list = [start_time_multi] # SMC particles initialisation betaSamples = np.random.beta(betaPrior[0], betaPrior[1], numberOfThetaSamples) tauSamples = np.random.beta(tauPrior[0], tauPrior[1], numberOfThetaSamples) gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples) logThetaWeights = np.zeros(numberOfThetaSamples) logThetaLks = np.zeros(numberOfThetaSamples) currentSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples unnormalisedAncestorsWeights = np.ones( [numberOfThetaSamples, numberOfStateSamples]) essList = np.zeros(numberOfTrials) tasksetLikelihood = np.zeros(K) # Guided SMC variables betaSamplesNew = np.zeros(numberOfThetaSamples) tauSamplesNew = np.zeros(numberOfThetaSamples) gammaSamplesNew = np.zeros([numberOfThetaSamples, K]) stateSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples]) logThetaLksNew = np.zeros(numberOfThetaSamples) dirichletParamCandidates = np.zeros(K) stateSamplesCandidates = np.zeros(numberOfStateSamples, dtype=np.intc) weightsSamplesCandidates = np.zeros(numberOfStateSamples) idxTrajectories = np.zeros(numberOfThetaSamples) # Plot progress if show_progress: plt.figure(figsize=(12, 9)) # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() time_list.append(time.time() - start_time_multi) if (T + 1) % 100 == 0: print('\n') if T > 0: smc_c.guidedUpdateStep_c(logThetaLks, logThetaWeights, np.ascontiguousarray(currentSamples), gammaSamples, betaSamples/2. + 1./2, tauSamples/2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, \ np.ascontiguousarray(mapping), stimuli[T-2], stimuli[T-1], rewards[T-1], actions[T-1]) ancestorSamples = np.array(currentSamples) # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) if (essList[T] < coefficient * numberOfThetaSamples) and ( acceptance_list[-1] > 0.05): acceptanceProba = 0. tauMu = np.sum(normalisedThetaWeights * tauSamples) tauVar = np.sum(normalisedThetaWeights * (tauSamples - tauMu)**2) tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2 tauBeta = tauAlpha * (1 / tauMu - 1) assert (tauAlpha > 0) assert (tauBeta > 0) betaMu = np.sum(normalisedThetaWeights * betaSamples) betaVar = np.sum(normalisedThetaWeights * (betaSamples - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T, axis=1) dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / ( np.sum(dirichletVar)) - 1 dirichletParamCandidates = dirichletMeans * dirichletPrecision assert ((dirichletParamCandidates > 0).all()) idxTrajectories = useful_functions.stratified_resampling( normalisedThetaWeights) for theta_idx in range(numberOfThetaSamples): tauCandidate = np.random.beta(tauAlpha, tauBeta) betaCandidate = np.random.beta(betaAlpha, betaBeta) gammaCandidate = np.random.dirichlet(dirichletParamCandidates) # Launch guidedSMC logLksCandidate = smc_c.guidedSmc_c(np.ascontiguousarray(stateSamplesCandidates), weightsSamplesCandidates, gammaCandidate, betaCandidate/2. + 1./2, tauCandidate/2., np.ascontiguousarray(mapping), \ np.ascontiguousarray(stimuli[:T], dtype=np.intc), np.ascontiguousarray(rewards[:T], dtype=np.intc), np.ascontiguousarray(actions[:T], dtype=np.intc), numberOfStateSamples) # Update a trajectory idx_traj = idxTrajectories[theta_idx] priorsLogRatio = useful_functions.log_dirichlet_pdf( gammaCandidate, gammaPrior) - useful_functions.log_dirichlet_pdf( gammaSamples[idx_traj], gammaPrior) transLogRatio = useful_functions.log_beta_pdf(tauSamples[idx_traj], tauAlpha, tauBeta) + useful_functions.log_beta_pdf(betaSamples[idx_traj], betaAlpha, betaBeta) + useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], dirichletParamCandidates) - \ useful_functions.log_beta_pdf(tauCandidate, tauAlpha, tauBeta) - useful_functions.log_beta_pdf(betaCandidate, betaAlpha, betaBeta) - useful_functions.log_dirichlet_pdf(gammaCandidate, dirichletParamCandidates) logLkdRatio = logLksCandidate - logThetaLks[idx_traj] logAlpha = min(0, priorsLogRatio + transLogRatio + logLkdRatio) U = np.random.rand() # Accept or Reject if np.log(U) < logAlpha: acceptanceProba += 1. betaSamplesNew[theta_idx] = betaCandidate tauSamplesNew[theta_idx] = tauCandidate gammaSamplesNew[theta_idx] = gammaCandidate stateSamplesNew[theta_idx] = stateSamplesCandidates logThetaLksNew[theta_idx] = logLksCandidate weightsSamplesNew[theta_idx] = weightsSamplesCandidates else: betaSamplesNew[theta_idx] = betaSamples[idx_traj] tauSamplesNew[theta_idx] = tauSamples[idx_traj] gammaSamplesNew[theta_idx] = gammaSamples[idx_traj] stateSamplesNew[theta_idx] = ancestorSamples[idx_traj] logThetaLksNew[theta_idx] = logThetaLks[idx_traj] weightsSamplesNew[theta_idx] = ancestorsWeights[idx_traj] print('\n') print('acceptance ratio is ') print(acceptanceProba / numberOfThetaSamples) print('\n') acceptance_list.append(acceptanceProba / numberOfThetaSamples) ancestorsWeights = np.array(weightsSamplesNew) logThetaLks = np.array(logThetaLksNew) logThetaWeights = np.zeros(numberOfThetaSamples) ancestorSamples = np.array(stateSamplesNew) betaSamples = np.array(betaSamplesNew) tauSamples = np.array(tauSamplesNew) gammaSamples = np.array(gammaSamplesNew) normalisedThetaWeights = useful_functions.to_normalized_weights( logThetaWeights) # Launch bootstrap update smc_c.bootstrapUpdateStep_c(np.ascontiguousarray(currentSamples), gammaSamples, betaSamples / 2. + 1. / 2, tauSamples / 2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T - 1]) # Take decision for ts_idx in range(K): tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentSamples == ts_idx), axis=1)) # Select action and compute vol volTracking[T] = np.sum(normalisedThetaWeights * tauSamples) volStdTracking[T] = np.sum(normalisedThetaWeights * (tauSamples - volTracking[T])**2) betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples) betaStdTracking[T] = np.sum(normalisedThetaWeights * (betaSamples - betaTracking[T])**2) rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] if show_progress: plt.subplot(3, 2, 1) plt.imshow(tsProbability[:T].T, aspect='auto') plt.hold(True) plt.plot(Z_true[:T], 'w--') plt.axis([0, T - 1, 0, K - 1]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('p(TS|past) at current time') plt.subplot(3, 2, 2) plt.plot(volTracking[:T], 'b') plt.hold(True) plt.fill_between(np.arange(T), volTracking[:T] - volStdTracking[:T], volTracking[:T] + volStdTracking[:T], facecolor=[.5, .5, 1], color=[.5, .5, 1]) plt.plot(td['tau'], 'b--', linewidth=2) plt.axis([0, T - 1, 0, .5]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('Volatility') plt.subplot(3, 2, 3) x = np.linspace(0.01, .99, 100) plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]), 'r') plt.hold(True) plt.plot([betaTracking[T], betaTracking[T]], plt.gca().get_ylim(), 'r', linewidth=2) plt.plot([td['beta'], td['beta']], plt.gca().get_ylim(), 'r--', linewidth=2) plt.hold(False) plt.xlabel('Parameters') plt.ylabel('Gaussian pdf') plt.subplot(3, 2, 4) plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [ coefficient * numberOfThetaSamples, coefficient * numberOfThetaSamples ], 'g--', linewidth=2) plt.axis([0, T - 1, 0, numberOfThetaSamples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ESS') plt.subplot(3, 2, 5) plt.plot(np.divide(countPerformance[:T], np.arange(T) + 1), 'k--', linewidth=2) plt.hold(True) plt.axis([0, T - 1, 0, 1]) plt.hold(False) plt.xlabel('Trials') plt.ylabel('Performance') plt.draw() elapsed_time = time.time() - start_time_multi return [ td, tauSamples, volTracking, volStdTracking, betaSamples, betaTracking, betaStdTracking, gammaSamples, tsProbability, countPerformance, actions, acceptance_list, essList, time_list, elapsed_time ]
def SMC2(td, beta_softmax=1., numberOfStateSamples=200, numberOfThetaSamples=200, numberOfBetaSamples=50, coefficient=.5, latin_hyp_sampling=True): print('\n') print('Forward Varying Volatility Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') start_time_multi = time.time() # uniform distribution if latin_hyp_sampling: d0 = uniform() print('latin hypercube sampling') else: print('sobolev sampling') # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli rewards = td['reward'] actions = td['A_chosen'] K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(stimuli) # Number of Trials # verification if K == 2: if latin_hyp_sampling == False: raise ValueError( 'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.' ) # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter nuPrior = np.array([ 3, 1e-3 ]) # Prior on Nu, the variance on the projected gaussian random walk gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter try: tauDefault = td['tau'][0] except: tauDefault = td['tau'] log_proba_ = 0. # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T betaWeights = np.zeros(numberOfBetaSamples) betaAncestors = np.arange(numberOfBetaSamples) # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions]) sum_actionLik = np.zeros(numberOfBetaSamples) filt_actionLkd = np.zeros( [numberOfTrials, numberOfBetaSamples, numberOfActions]) # Keep track of probability correct/exploration after switches tsProbability = np.zeros([numberOfBetaSamples, K]) sum_tsProbability = np.zeros(numberOfBetaSamples) # SMC particles initialisation muSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) nuSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) if K == 24: try: latin_hyp_samples = pickle.load( open('../../utils/sobol_200_26.pkl', 'rb')) except: latin_hyp_samples = pickle.load( open('../../models/utils/sobol_200_26.pkl', 'rb')) for beta_idx in range(numberOfBetaSamples): if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd(dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0], betaPrior[0], betaPrior[1]) nuSamples[beta_idx] = useful_functions.ppf_inv_gamma( latin_hyp_samples[:, 1], nuPrior[0], nuPrior[1]) gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:], gammaPrior) gammaSamples[beta_idx] = np.transpose( gammaSamples[beta_idx].T / np.sum(gammaSamples[beta_idx], axis=1)) elif K == 2: muSamples = np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) nuSamples = useful_functions.sample_inv_gamma( nuPrior[0], nuPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.random.dirichlet( gammaPrior, [numberOfBetaSamples, numberOfThetaSamples]) else: raise IndexError('Wrong number of task sets') muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) nuSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) normalisedThetaWeights = np.zeros( [numberOfBetaSamples, numberOfThetaSamples]) logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) currentStateSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) currentTauSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorStateSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorTauSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples essList = np.zeros(numberOfTrials) # Guided SMC variables dirichletParamCandidates = np.zeros(K) # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') for beta_idx in range(numberOfBetaSamples): ances = betaAncestors[beta_idx] # Update theta weights smc_c.bootstrapUpdateStep_c(currentStateSamples[beta_idx], logThetaWeights[beta_idx], currentTauSamples[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, nuSamples[ances], tauDefault, T, \ np.ascontiguousarray(ancestorStateSamples[ances], dtype=np.intc), ancestorTauSamples[ances], ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1]) # Degeneray criterion logEss = 2 * useful_functions.log_sum( logThetaWeights[beta_idx]) - useful_functions.log_sum( 2 * logThetaWeights[beta_idx]) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights[ beta_idx] = useful_functions.to_normalized_weights( logThetaWeights[beta_idx]) if (essList[T] < coefficient * numberOfThetaSamples): betaMu = np.sum(normalisedThetaWeights[beta_idx] * muSamples[ances]) betaVar = np.sum(normalisedThetaWeights[beta_idx] * (muSamples[ances] - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) nuMu = np.sum(normalisedThetaWeights[beta_idx] * nuSamples[ances]) nuVar = np.sum(normalisedThetaWeights[beta_idx] * (nuSamples[ances] - nuMu)**2) nuAlpha = nuMu**2 / nuVar + 2 nuBeta = nuMu * (nuAlpha - 1) assert (nuAlpha > 0) assert (nuBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] * gammaSamples[ances].T, axis=1) dirichletVar = np.sum(normalisedThetaWeights[beta_idx] * (gammaSamples[ances]**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2 ) / (np.sum(dirichletVar)) - 1 dirichletParamCandidates[:] = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) if K == 2: nuSamplesNew[beta_idx] = useful_functions.sample_inv_gamma( nuAlpha, nuBeta, numberOfThetaSamples) muSamplesNew[beta_idx] = np.random.beta( betaAlpha, betaBeta, numberOfThetaSamples) gammaSamplesNew[beta_idx] = np.random.dirichlet( dirichletParamCandidates, numberOfThetaSamples) elif K == 24: if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd( dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 0], betaAlpha, betaBeta) nuSamplesNew[beta_idx] = useful_functions.ppf_inv_gamma( latin_hyp_samples[:, 1], nuAlpha, nuBeta) gammaSamplesNew[beta_idx] = gammalib.ppf( latin_hyp_samples[:, 2:], dirichletParamCandidates) gammaSamplesNew[beta_idx] = np.transpose( gammaSamplesNew[beta_idx].T / np.sum(gammaSamplesNew[beta_idx], axis=1)) logThetaWeightsNew[beta_idx] = 0. normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples else: muSamplesNew[beta_idx] = muSamples[ances] gammaSamplesNew[beta_idx] = gammaSamples[ances] nuSamplesNew[beta_idx] = nuSamples[ances] logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx] # task set probability sum_tsProbability[:] = 0. for ts_idx in range(K): tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentStateSamples == ts_idx), axis=2), axis=1) sum_tsProbability += tsProbability[:, ts_idx] tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability) # Compute action likelihood sum_actionLik[:] = 0. for action_idx in range(numberOfActions): actionLikelihood[:, action_idx] = np.exp( np.log( np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] == action_idx], axis=1)) * beta_softmax) sum_actionLik += actionLikelihood[:, action_idx] rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik) betaWeights[:] = actionLikelihood[:, actions[T].astype(int)] filt_actionLkd[T] = actionLikelihood log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples) betaWeights = betaWeights / sum(betaWeights) betaAncestors[:] = useful_functions.stratified_resampling(betaWeights) # update particles muSamples[:] = muSamplesNew gammaSamples[:] = gammaSamplesNew nuSamples[:] = nuSamplesNew logThetaWeights[:] = logThetaWeightsNew[betaAncestors] ancestorTauSamples[:] = currentTauSamples ancestorStateSamples[:] = currentStateSamples elapsed_time = time.time() - start_time_multi return log_proba_, filt_actionLkd
def SMC2(td, show_progress=True, numberOfStateSamples=1000, numberOfThetaSamples=1000, coefficient = .5, beta_softmax=None): print('Varying Volatility Model'); print('\n') #Start timer start_time_multi = time.time() # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli K = np.prod(np.arange(numberOfActions+1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter nuPrior = np.array([3, 1e-3]) # Prior on Nu, the variance on the projected gaussian random walk gammaPrior = numpy.ones(K) # Prior on Gamma, the Dirichlet parameter try: tauDefault = td['tau'][0] except: tauDefault = td['tau'] # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(state_num=numberOfStimuli, action_num=numberOfActions).T # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros(numberOfActions) # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets actions = np.zeros(numberOfTrials) - 1 rewards = np.zeros(numberOfTrials, dtype=bool) # Keep track of probability correct/exploration after switches countPerformance = np.zeros(numberOfTrials) # Number of correct actions after i trials countExploration = np.zeros(numberOfTrials) # Number of exploratory actions after i trials correct_before_switch = np.empty(0) # The correct task set before switch tsProbability = np.zeros([numberOfTrials, K]) volTracking = np.zeros(numberOfTrials) # Volatility with time volStdTracking = np.zeros(numberOfTrials) nuTracking = np.zeros(numberOfTrials) nuStdTracking = np.zeros(numberOfTrials) betaTracking = np.zeros(numberOfTrials) betaStdTracking = np.zeros(numberOfTrials) acceptanceProba = 0. # Acceptance proba acceptance_list = [1.] time_list = [start_time_multi] # SMC particles initialisation betaSamples = np.random.beta(betaPrior[0], betaPrior[1], numberOfThetaSamples) nuSamples = useful_functions.sample_inv_gamma(nuPrior[0], nuPrior[1], numberOfThetaSamples) gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples) logThetaWeights = np.zeros(numberOfThetaSamples) logThetaLks = np.zeros(numberOfThetaSamples) currentStateSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) currentTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorStateSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.double) ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples])/numberOfStateSamples unnormalisedAncestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples]) essList = np.zeros(numberOfTrials) tasksetLikelihood = np.zeros(K) # Guided SMC variables betaSamplesNew = np.zeros(numberOfThetaSamples) nuSamplesNew = np.zeros(numberOfThetaSamples) gammaSamplesNew = np.zeros([numberOfThetaSamples, K]) stateSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) tauSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples]) weightsSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples]) logThetaLksNew = np.zeros(numberOfThetaSamples) dirichletParamCandidates = np.zeros(K) stateSamplesCandidates = np.zeros(numberOfStateSamples, dtype=np.intc) tauSamplesCandidates = np.zeros(numberOfStateSamples, dtype =np.double) weightsSamplesCandidates = np.zeros(numberOfStateSamples) idxTrajectories = np.zeros(numberOfThetaSamples) # Plot progress if show_progress : plt.figure(figsize=(12,9)); plt.ion(); # Loop over trials for T in range(numberOfTrials): # Print progress if (T+1) % 10 == 0 : sys.stdout.write(' ' + str(T+1));sys.stdout.flush(); time_list.append(time.time() - start_time_multi); if (T+1) % 100 == 0: print ('\n') if T > 0: # Update theta weights smc_c.guidedUpdateStep_c(logThetaLks, logThetaWeights, np.ascontiguousarray(currentStateSamples), currentTauSamples, gammaSamples, betaSamples/2. + 1/2., nuSamples, tauDefault, T, np.ascontiguousarray(ancestorStateSamples),\ ancestorTauSamples, ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-2], stimuli[T-1], rewards[T-1], actions[T-1]) ancestorTauSamples = np.array(currentTauSamples) ancestorStateSamples = np.array(currentStateSamples) # Degeneray criterion logEss = 2 * useful_functions.log_sum(logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights) essList[T] = np.exp(logEss) # Move step normalisedThetaWeights = useful_functions.to_normalized_weights(logThetaWeights) if (essList[T] < coefficient * numberOfThetaSamples) and (acceptance_list[-1] > 0.05): acceptanceProba = 0. betaMu = np.sum(normalisedThetaWeights*betaSamples) betaVar = np.sum(normalisedThetaWeights * (betaSamples - betaMu)**2) betaAlpha = ((1 - betaMu)/betaVar - 1/betaMu) * betaMu**2 betaBeta = betaAlpha * (1/betaMu - 1) assert(betaAlpha > 0); assert(betaBeta > 0) nuMu = np.sum(normalisedThetaWeights * nuSamples) nuVar = np.sum(normalisedThetaWeights * (nuSamples - nuMu)**2) nuAlpha = nuMu**2/nuVar + 2 nuBeta = nuMu * (nuAlpha - 1) assert(nuAlpha > 0); assert(nuBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights*gammaSamples.T, axis=1) dirichletVar = np.sum(normalisedThetaWeights*(gammaSamples**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2)/(np.sum(dirichletVar)) - 1 dirichletParamCandidates = dirichletMeans * dirichletPrecision assert((dirichletParamCandidates>0).all()) idxTrajectories = useful_functions.stratified_resampling(normalisedThetaWeights) for theta_idx in range(numberOfThetaSamples): nuCandidate = useful_functions.sample_inv_gamma(nuAlpha, nuBeta) betaCandidate = np.random.beta(betaAlpha, betaBeta) gammaCandidate = np.random.dirichlet(dirichletParamCandidates) # Launch guidedSMC logLksCandidate = smc_c.guidedSmc_c(np.ascontiguousarray(stateSamplesCandidates), tauSamplesCandidates, weightsSamplesCandidates, gammaCandidate, betaCandidate/2. + 1/2., nuCandidate, tauDefault, np.ascontiguousarray(mapping), \ np.ascontiguousarray(stimuli[:T], dtype=np.intc), np.ascontiguousarray(rewards[:T], dtype=np.intc), np.ascontiguousarray(actions[:T], dtype=np.intc), numberOfStateSamples) # Update a trajectory idx_traj = idxTrajectories[theta_idx] priorsLogRatio = useful_functions.log_invgamma_pdf(nuCandidate, nuPrior[0], nuPrior[1]) + useful_functions.log_dirichlet_pdf(gammaCandidate, gammaPrior) - \ useful_functions.log_invgamma_pdf(nuSamples[idx_traj], nuPrior[0], nuPrior[1]) - useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], gammaPrior) transLogRatio = useful_functions.log_invgamma_pdf(nuSamples[idx_traj], nuAlpha, nuBeta) + useful_functions.log_beta_pdf(betaSamples[idx_traj], betaAlpha, betaBeta) + useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], dirichletParamCandidates) - \ useful_functions.log_invgamma_pdf(nuCandidate, nuAlpha, nuBeta) - useful_functions.log_beta_pdf(betaCandidate, betaAlpha, betaBeta) - useful_functions.log_dirichlet_pdf(gammaCandidate, dirichletParamCandidates) logLkdRatio = logLksCandidate - logThetaLks[idx_traj] logAlpha = min(0, priorsLogRatio + transLogRatio + logLkdRatio) U = np.random.rand() # Accept or Reject if np.log(U) < logAlpha: acceptanceProba += 1. betaSamplesNew[theta_idx] = betaCandidate nuSamplesNew[theta_idx] = nuCandidate gammaSamplesNew[theta_idx] = gammaCandidate stateSamplesNew[theta_idx] = stateSamplesCandidates tauSamplesNew[theta_idx] = tauSamplesCandidates weightsSamplesNew[theta_idx] = weightsSamplesCandidates logThetaLksNew[theta_idx] = logLksCandidate else: betaSamplesNew[theta_idx] = betaSamples[idx_traj] nuSamplesNew[theta_idx] = nuSamples[idx_traj] gammaSamplesNew[theta_idx] = gammaSamples[idx_traj] stateSamplesNew[theta_idx] = ancestorStateSamples[idx_traj] tauSamplesNew[theta_idx] = ancestorTauSamples[idx_traj] weightsSamplesNew[theta_idx] = ancestorsWeights[idx_traj] logThetaLksNew[theta_idx] = logThetaLks[idx_traj] print ('\n') print ('acceptance ratio is ') print (acceptanceProba/numberOfThetaSamples) print ('\n') acceptance_list.append(acceptanceProba/numberOfThetaSamples) ancestorsWeights = np.array(weightsSamplesNew) logThetaLks = np.array(logThetaLksNew) logThetaWeights = np.zeros(numberOfThetaSamples) ancestorStateSamples = np.array(stateSamplesNew) ancestorTauSamples = np.array(tauSamplesNew) betaSamples = np.array(betaSamplesNew) nuSamples = np.array(nuSamplesNew) gammaSamples = np.array(gammaSamplesNew) normalisedThetaWeights = useful_functions.to_normalized_weights(logThetaWeights) # Launch bootstrap update smc_c.bootstrapUpdateStep_c(currentStateSamples, currentTauSamples, gammaSamples, betaSamples/2. + 1./2, nuSamples, tauDefault, T, \ np.ascontiguousarray(ancestorStateSamples, dtype=np.intc), ancestorTauSamples, ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1]) # Take decision for ts_idx in range(K): tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum((currentStateSamples == ts_idx), axis = 1)) # Todo : change!!! take out currentAncestorsWeights if beta_softmax is None: # Compute action likelihood for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.sum(tsProbability[T, mapping[stimuli[T]] == action_idx]) # Select action actions[T] = np.argmax(actionLikelihood) else: # Compute action likelihood tsProbability[T] /= sum(tsProbability[T]) for action_idx in range(numberOfActions): actionLikelihood[action_idx] = np.exp(np.log(np.sum(tsProbability[T, mapping[stimuli[T].astype(int)] == action_idx])) * beta_softmax) actionLikelihood /= sum(actionLikelihood) # Select action actions[T] = np.where(np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0] # Select action and compute vol, nu, beta for tracking volTracking[T] = np.sum(normalisedThetaWeights * (np.sum(currentTauSamples, axis=1)/numberOfStateSamples)) volStdTracking[T] = np.sum(normalisedThetaWeights * (np.sum(currentTauSamples**2, axis=1)/numberOfStateSamples)) - volTracking[T]**2 nuTracking[T] = np.sum(normalisedThetaWeights * nuSamples) nuStdTracking[T] = np.sum(normalisedThetaWeights * (nuSamples - nuTracking[T])**2) betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples) betaStdTracking[T] = np.sum(normalisedThetaWeights * (betaSamples - betaTracking[T])**2) # Update performance if K == 2: assert(mapping[stimuli[T].astype(int), Z_true[T].astype(int)] == Z_true[T]) if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int), Z_true[T].astype(int)]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 elif (K == 24) and (actions[T] == td['A_correct'][T]): rewards[T] = not td['trap'][T] countPerformance[T:] += 1 else: rewards[T] = td['trap'][T] if show_progress: plt.subplot(3,2,1) plt.imshow(tsProbability[:T].T, aspect='auto'); plt.hold(True) plt.plot(Z_true[:T], 'w--') plt.axis([0, T-1, 0, K-1]) plt.hold(False) plt.xlabel('trials') plt.ylabel('p(TS|past) at current time') plt.subplot(3,2,2) plt.plot(volTracking[:T], 'b'); plt.hold(True) plt.fill_between(np.arange(T),volTracking[:T]-volStdTracking[:T], volTracking[:T]+volStdTracking[:T],facecolor=[.5,.5,1], color=[.5,.5,1]); plt.plot(td['tau'], 'b--', linewidth=2) plt.axis([0, T-1, 0, .5]) plt.hold(False) plt.xlabel('trials') plt.ylabel('Volatility') plt.subplot(3,2,3) x = np.linspace(0.01,.99,100) plt.plot(x, normlib.pdf(x, nuTracking[T], nuStdTracking[T]), 'b'); plt.hold(True) plt.plot([nuTracking[T], nuTracking[T]], plt.gca().get_ylim(),'b', linewidth=2) plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]), 'r') plt.plot([betaTracking[T], betaTracking[T]], plt.gca().get_ylim(),'r', linewidth=2) plt.plot([td['beta'], td['beta']], plt.gca().get_ylim(), 'r--', linewidth=2) plt.hold(False) plt.xlabel('Parameters') plt.ylabel('Gaussian pdf') plt.subplot(3,2,4) plt.plot(np.arange(T)+1, essList[:T], 'g', linewidth=2); plt.hold(True) plt.plot(plt.gca().get_xlim(), [coefficient*numberOfThetaSamples,coefficient*numberOfThetaSamples], 'g--', linewidth=2); plt.axis([0,T-1,0,numberOfThetaSamples]); plt.hold(False); plt.xlabel('trials'); plt.ylabel('ESS'); plt.subplot(3,2,5); plt.plot(np.divide(countPerformance[:T], np.arange(T)+1), 'k--', linewidth=2); plt.hold(True) plt.axis([0,T-1,0,1]); plt.hold(False); plt.xlabel('Trials'); plt.ylabel('Performance'); plt.draw() plt.show() plt.pause(0.1) elapsed_time = time.time() - start_time_multi return [td, nuSamples, nuTracking, nuStdTracking, volTracking, volTracking, betaSamples, betaTracking, betaStdTracking, gammaSamples, tsProbability, countPerformance, actions, acceptance_list, essList, time_list, elapsed_time]
def smc2(actions, rewards, idx_blocks, choices, subj_idx, show_progress, apply_rep, apply_weber, beta_softmax, temperature, observational_noise): assert (2 not in actions) assert (0 in actions) assert (1 in actions) assert (apply_rep == 0 or apply_rep == 1) assert (apply_weber == 0 or apply_weber == 1) # Extract parameters from task description actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) idx_blocks = np.asarray(idx_blocks, dtype=np.intc) N_samples = 1000 n_theta = 1000 coefficient = .5 T = actions.shape[0] prev_action = -1 upp_bound_eta = 10. if apply_rep: n_param = 5 else: n_param = 4 if apply_weber == 1: upp_bound_eps = 1. else: upp_bound_eps = .5 # samples samples = np.random.rand(n_theta, n_param) if beta_softmax > 0: temperature = False samples[:, 2] = beta_softmax sample_beta = False upp_bound_beta = beta_softmax else: if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(n_theta) * upp_bound_beta sample_beta = True samples[:, 3] = np.random.rand(n_theta) * upp_bound_eps if apply_rep: samples[:, 4] = (2 * np.random.rand(n_theta) - 1) * upp_bound_eta # variable memory noisy_descendants = np.zeros([n_theta, N_samples, 2]) noisy_ancestors = np.zeros([n_theta, N_samples, 2]) weights_norm = np.zeros([n_theta, N_samples]) log_weights_a = np.zeros([n_theta]) ancestorsIndexes = np.ascontiguousarray(np.zeros(n_theta, dtype=np.intc)) logThetaWeights = np.zeros(n_theta) logThetalkd = np.zeros(n_theta) log_lkd = np.zeros(n_theta) essList = np.zeros(T) acceptance_list = [] marg_loglkd = 0 #move step variables ancestors_indexes_p = np.ascontiguousarray( np.zeros(N_samples, dtype=np.intc)) samples_new = np.zeros([n_theta, n_param]) weights_new = np.zeros([n_theta, N_samples]) states_new = np.zeros([n_theta, N_samples, 2]) logThetalkd_new = np.zeros(n_theta) state_candidates = np.zeros([N_samples, 2]) state_candidates_a = np.zeros([N_samples, 2]) weights_candidates = np.zeros(N_samples) # history of samples noisy_history = np.zeros([T, 2]) if show_progress: plt.figure(figsize=(15, 9)) plt.suptitle("noisy rl", fontsize=14) plt.ion() for t_idx in range(T): # Print progress if (t_idx + 1) % 10 == 0: sys.stdout.write(' ' + str(t_idx + 1)) sys.stdout.flush() print ' marg_loglkd ' + str(marg_loglkd) prev_rew = np.ascontiguousarray(rewards[:, max(0, t_idx - 1)]) log_weights_a[:] = logThetaWeights if t_idx > 0 and choices[t_idx - 1]: assert (actions[max(0, t_idx - 1)] == prev_action) smc_c.smc_update_2q_c(log_lkd, logThetalkd, noisy_descendants, noisy_ancestors, weights_norm, logThetaWeights, ancestorsIndexes, samples, \ idx_blocks, choices, prev_action, actions, prev_rew, t_idx, apply_rep, apply_weber, 2, temperature, observational_noise) # save and update marg_loglkd += logsumexp(log_weights_a + log_lkd) - logsumexp(log_weights_a) normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights) noisy_history[t_idx] = np.sum((normalisedThetaWeights * np.sum( np.transpose(weights_norm * noisy_descendants.T), axis=1).T), axis=1) # Degeneray criterion logEss = 2 * uf.log_sum(logThetaWeights) - uf.log_sum( 2 * logThetaWeights) essList[t_idx] = np.exp(logEss) # update repetition action if choices[t_idx] == 1: prev_action = actions[t_idx] # Move step if (essList[t_idx] < coefficient * n_theta): acceptance_proba = 0 if not sample_beta: samples_tmp = np.delete(samples, 2, axis=1) mu_p = np.sum(samples_tmp.T * normalisedThetaWeights, axis=1) Sigma_p = np.dot( (samples_tmp - mu_p).T * normalisedThetaWeights, (samples_tmp - mu_p)) else: mu_p = np.sum(samples.T * normalisedThetaWeights, axis=1) Sigma_p = np.dot((samples - mu_p).T * normalisedThetaWeights, (samples - mu_p)) ancestorsIndexes[:] = uf.stratified_resampling( normalisedThetaWeights) for theta_idx in range(n_theta): idx_traj = ancestorsIndexes[theta_idx] while True: sample_cand = np.array(samples[idx_traj]) sample_p = multi_norm(mu_p, Sigma_p) sample_p_copy = np.array(sample_p) if (not sample_beta) and apply_rep: sample_p = np.array([ sample_p[0], sample_p[1], beta_softmax, sample_p[2], sample_p[3] ]) sample_cand = np.delete(sample_cand, 2) elif not sample_beta: sample_p = np.array([ sample_p[0], sample_p[1], beta_softmax, sample_p[2] ]) sample_cand = np.delete(sample_cand, 2) if apply_rep: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \ and sample_p[3] > 0 and sample_p[3] < upp_bound_eps and sample_p[4] > -upp_bound_eta and sample_p[4] < upp_bound_eta: break else: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \ and sample_p[3] > 0 and sample_p[3] < upp_bound_eps: break # Launch SMC logmarglkd_p = smc_c.smc_2q_c(state_candidates, state_candidates_a, weights_candidates, sample_p, ancestors_indexes_p, \ idx_blocks, actions, rewards, choices, t_idx + 1, apply_rep, apply_weber, 2, temperature, observational_noise) logAlpha = np.minimum(0, logmarglkd_p - logThetalkd[idx_traj] \ + get_logtruncnorm(sample_cand, mu_p, Sigma_p) - get_logtruncnorm(sample_p_copy, mu_p, Sigma_p) ) # accept or reject if np.log(np.random.rand()) < logAlpha: acceptance_proba += 1. samples_new[theta_idx] = sample_p weights_new[theta_idx] = weights_candidates states_new[theta_idx] = state_candidates logThetalkd_new[theta_idx] = logmarglkd_p else: samples_new[theta_idx] = samples[idx_traj] weights_new[theta_idx] = weights_norm[idx_traj] states_new[theta_idx] = noisy_descendants[idx_traj] logThetalkd_new[theta_idx] = logThetalkd[idx_traj] print('\n') print('acceptance ratio is ') print(acceptance_proba / n_theta) print('\n') acceptance_list.append(acceptance_proba / n_theta) weights_norm[:] = weights_new logThetalkd[:] = logThetalkd_new logThetaWeights[:] = np.zeros(n_theta) noisy_descendants[:] = states_new samples[:] = samples_new normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights) if show_progress and t_idx % 10: plt.subplot(3, 2, 1) plt.plot(range(t_idx), noisy_history[:t_idx, 0], 'r') plt.hold(True) plt.plot(range(t_idx), noisy_history[:t_idx, 1], 'b') plt.hold(False) plt.xlabel('trials') plt.ylabel('Q-value 0 (red), and 1 (blue)') plt.subplot(3, 2, 4) plt.plot(range(t_idx), essList[:t_idx], 'b', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [n_theta / 2, n_theta / 2], 'b--', linewidth=2) plt.axis([0, t_idx - 1, 0, n_theta]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') if temperature: mean_beta = np.sum(normalisedThetaWeights * (1. / samples[:, 2])) std_beta = np.sqrt( np.sum(normalisedThetaWeights * (1. / samples[:, 2])**2) - mean_beta**2) x = np.linspace(0., 200, 5000) else: mean_beta = np.sum(normalisedThetaWeights * (10**samples[:, 2])) std_beta = np.sqrt( np.sum(normalisedThetaWeights * (10**samples[:, 2])**2) - mean_beta**2) x = np.linspace(0., 10**upp_bound_beta, 5000) plt.subplot(3, 2, 3) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g') plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(normalisedThetaWeights * samples[:, 0]) std_alpha_0 = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 0]**2) - mean_alpha_0**2) mean_alpha_1 = np.sum(normalisedThetaWeights * samples[:, 1]) std_alpha_1 = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 1]**2) - mean_alpha_1**2) plt.subplot(3, 2, 2) x = np.linspace(0., 1., 5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm') plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm') plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c') plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c') plt.hold(False) plt.xlabel('learning rates') plt.ylabel('pdf') mean_epsilon = np.sum(normalisedThetaWeights * samples[:, 3]) std_epsilon = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 3]**2) - mean_epsilon**2) plt.subplot(3, 2, 6) x = np.linspace(0., upp_bound_eps, 5000) if apply_rep == 1: mean_rep = np.sum(normalisedThetaWeights * samples[:, 4]) std_rep = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 4]**2) - mean_rep**2) x = np.linspace(-2., 2., 5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'y') plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(), 'y', linewidth=2) plt.plot(x, norm.pdf(x, mean_epsilon, std_epsilon), 'g') plt.hold(True) plt.plot([mean_epsilon, mean_epsilon], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('epsilon std (green), rep_bias (yellow)') plt.ylabel('pdf') plt.draw() plt.show() plt.pause(0.05) return [ samples, noisy_history, acceptance_list, normalisedThetaWeights, logThetalkd, marg_loglkd ]
def ibis(actions, rewards, choices, idx_blocks, subj_idx, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, show_progress, temperature): assert (2 not in actions) assert (0 in actions) assert (1 in actions) actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) choices = np.asarray(choices, dtype=np.intc) idx_blocks = np.asarray(idx_blocks, dtype=np.intc) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. # sample initialisation if (apply_rep_bias or curiosity_bias) and apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 4) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta samples[:, 3] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) elif apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 3) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta elif apply_weber_decision_noise == 1: if apply_rep_bias: samples = np.random.rand(nb_samples, 5) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 4] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) else: samples = np.random.rand(nb_samples, 4) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. upp_bound_k = 10 samples[:, 2] = np.random.rand( nb_samples) * upp_bound_beta # bound on the beta samples[:, 3] = np.random.rand(nb_samples) * upp_bound_k Q_samples = np.zeros([nb_samples, 2]) prev_action = np.zeros(nb_samples) - 1 # ibis param esslist = np.zeros(T) log_weights = np.zeros(nb_samples) weights_a = np.zeros(nb_samples) p_loglkd = np.zeros(nb_samples) loglkd = np.zeros(nb_samples) marg_loglkd = 0 coefficient = .5 marg_loglkd_l = np.zeros(T) acceptance_l = [] # move step param if apply_rep_bias and apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 5]) elif apply_rep_bias or curiosity_bias: move_samples = np.zeros([nb_samples, 4]) elif apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 4]) else: move_samples = np.zeros([nb_samples, 3]) move_p_loglkd = np.zeros(nb_samples) Q_samples_move = np.zeros([nb_samples, 2]) prev_action_move = np.zeros(nb_samples) mean_Q = np.zeros([T, 2]) prediction_err = np.zeros(nb_samples) prediction_err[:] = -np.inf prediction_err_move = np.zeros(nb_samples) if show_progress: plt.figure(figsize=(15, 9)) plt.suptitle("noiseless rl", fontsize=14) plt.ion() # loop for t_idx in range(T): if (t_idx + 1) % 10 == 0: sys.stdout.write(' ' + str(t_idx + 1) + ' ') print 'marg_loglkd ' + str(marg_loglkd) if (t_idx + 1) % 100 == 0: print('\n') assert (len(np.unique(prev_action)) == 1) # update step weights_a[:] = log_weights if idx_blocks[t_idx]: Q_samples[:] = 0.5 prev_action[:] = -1 # loop over samples for n_idx in range(nb_samples): alpha_c = samples[n_idx, 0] alpha_u = samples[n_idx, 1] if temperature: beta = 1. / samples[n_idx, 2] else: beta = 10**samples[n_idx, 2] if apply_rep_bias or curiosity_bias: eta = samples[n_idx, -1] if apply_weber_decision_noise: k_beta = samples[n_idx, 3] # reweighting if choices[t_idx] == 1 and prev_action[n_idx] != -1 and ( apply_rep_bias == 1 or curiosity_bias) and apply_weber_decision_noise == 0: if apply_rep_bias: value = 1. / ( 1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif curiosity_bias: try: count_samples = t_idx - 1 - np.where( actions[:t_idx] != actions[t_idx - 1])[0][-1] except: count_samples = t_idx assert (count_samples > 0) value = 1. / (1. + np.exp( beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) + np.sign(prev_action[n_idx] - .5) * eta * count_samples) ) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[t_idx] == 1 and apply_weber_decision_noise == 0: value = 1. / ( 1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[ t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 0: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1. / ( 1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[ t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 1: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1. / ( 1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] else: value = 1. loglkd[n_idx] = 0. if np.isnan(loglkd[n_idx]): print t_idx print n_idx print beta print value raise Exception p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx] log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx] # update step if actions[t_idx] == 0: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 0] - rewards[0, t_idx]) Q_samples[n_idx, 0] = (1 - alpha_c) * Q_samples[ n_idx, 0] + alpha_c * rewards[0, t_idx] if not curiosity_bias: Q_samples[n_idx, 1] = (1 - alpha_u) * Q_samples[ n_idx, 1] + alpha_u * rewards[1, t_idx] else: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 1] - rewards[1, t_idx]) if not curiosity_bias: Q_samples[n_idx, 0] = (1 - alpha_u) * Q_samples[ n_idx, 0] + alpha_u * rewards[0, t_idx] Q_samples[n_idx, 1] = (1 - alpha_c) * Q_samples[ n_idx, 1] + alpha_c * rewards[1, t_idx] marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a) marg_loglkd_l[t_idx] = marg_loglkd ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights)) esslist[t_idx] = ess weights_a[:] = uf.to_normalized_weights(log_weights) mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0) # move step if ess < coefficient * nb_samples: idxTrajectories = uf.stratified_resampling(weights_a) mu_p = np.sum(samples.T * weights_a, axis=1) Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p)) nb_acceptance = 0. for n_idx in range(nb_samples): idx_traj = idxTrajectories[n_idx] while True: sample_p = multi_norm(mu_p, Sigma_p) if not apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[ 1] > 0 and sample_p[1] < 1 and sample_p[ 2] > 0 and sample_p[2] <= upp_bound_beta: break elif not apply_rep_bias and apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] <= upp_bound_k: break elif apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > -upp_bound_eta and sample_p[3] < upp_bound_eta: break else: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] < upp_bound_k \ and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop, prediction_err_prop ] = get_loglikelihood(sample_p, rewards, actions, choices, idx_blocks, t_idx + 1, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, temperature) log_ratio = loglkd_prop - p_loglkd[idx_traj] \ + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p) log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. move_samples[n_idx] = sample_p move_p_loglkd[n_idx] = loglkd_prop Q_samples_move[n_idx] = Q_prop prediction_err_move[n_idx] = prediction_err_prop else: move_samples[n_idx] = samples[idx_traj] move_p_loglkd[n_idx] = p_loglkd[idx_traj] Q_samples_move[n_idx] = Q_samples[idx_traj] prediction_err_move[n_idx] = prediction_err[idx_traj] print 'acceptance ratio %s' % str(nb_acceptance / nb_samples) assert (prev_action_prop == prev_action[0]) acceptance_l.append(nb_acceptance / nb_samples) # move samples samples[:] = move_samples p_loglkd[:] = move_p_loglkd log_weights[:] = 0. Q_samples[:] = Q_samples_move prediction_err[:] = prediction_err_move if show_progress and t_idx % 10 == 0: weights_a[:] = uf.to_normalized_weights(log_weights) plt.subplot(3, 2, 1) plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('Q values') if apply_rep_bias == 1: mean_rep = np.sum(weights_a * samples[:, 3]) std_rep = np.sqrt( np.sum(weights_a * samples[:, 3]**2) - mean_rep**2) plt.subplot(3, 2, 2) x = np.linspace(-2., 2., 5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g') plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('rep param') if temperature: mean_beta = np.sum(weights_a * 1. / samples[:, 2]) std_beta = np.sqrt( np.sum(weights_a * ((1. / samples[:, 2])**2)) - mean_beta**2) else: mean_beta = np.sum(weights_a * 10**samples[:, 2]) std_beta = np.sqrt( np.sum(weights_a * ((10**samples[:, 2])**2)) - mean_beta**2) if apply_weber_decision_noise: mean_k = np.sum(weights_a * samples[:, 3]) std_k = np.sqrt( np.sum(weights_a * (samples[:, 3]**2)) - mean_k**2) plt.subplot(3, 2, 3) x = np.linspace(0.01, 200., 5000) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2) plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(weights_a * samples[:, 0]) std_alpha_0 = np.sqrt( np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2) mean_alpha_1 = np.sum(weights_a * samples[:, 1]) std_alpha_1 = np.sqrt( np.sum(weights_a * (samples[:, 1]**2)) - mean_alpha_1**2) plt.subplot(3, 2, 4) x = np.linspace(0., 1., 5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2) plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2) plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c', linewidth=2) plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c', linewidth=2) plt.hold(False) plt.xlabel('learning rate chosen (majenta) an unchosen (cyan)') plt.ylabel('pdf') plt.subplot(3, 2, 5) plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [nb_samples / 2, nb_samples / 2], 'b--', linewidth=2) plt.axis([0, t_idx - 1, 0, nb_samples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') # modified here add the plot for k plt.subplot(3, 2, 6) x = np.linspace(0.01, 10., 5000) plt.plot(x, norm.pdf(x, mean_k, std_k), 'k', linewidth=2) plt.hold(True) plt.plot([mean_k, mean_k], plt.gca().get_ylim(), 'k', linewidth=2) plt.hold(False) plt.xlabel('scaling parameter for softmax 1/[0 1]') plt.ylabel('pdf') plt.draw() plt.show() plt.pause(0.05) return [ samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l ]
def SMC2(td, beta_softmax=1., lambda_noise=.4, eta_noise=.1, epsilon_softmax=0., noise_inertie=0., numberOfStateSamples=200, numberOfThetaSamples=200, numberOfBetaSamples=20, coefficient=.5, latin_hyp_sampling=True): print('\n') print('Noisy Forward Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') #Start timer start_time_multi = time.time() # uniform distribution if latin_hyp_sampling: d0 = uniform() print('latin hypercube sampling') else: print('sobolev sampling') # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli Z_true = td['Z'] # Sequence of Task Sets numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli rewards = td['reward'] actions = td['A_chosen'] K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(Z_true) # Number of Trials distances = np.zeros([numberOfThetaSamples, 1]) # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter gammaPrior = np.ones(K) # Prior on Gamma, the Dirichlet parameter log_proba_ = 0. # verification if K == 2: if latin_hyp_sampling == False: raise ValueError( 'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.' ) # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T betaWeights = np.zeros(numberOfBetaSamples) betaLog = np.zeros(numberOfBetaSamples) logbetaWeights = np.zeros(numberOfBetaSamples) betaAncestors = np.arange(numberOfBetaSamples) # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions]) sum_actionLik = np.zeros(numberOfBetaSamples) filt_actionLkd = np.zeros( [numberOfTrials, numberOfBetaSamples, numberOfActions]) # Keep track of probability correct/exploration after switches tsProbability = np.zeros([numberOfBetaSamples, K]) sum_tsProbability = np.zeros(numberOfBetaSamples) dirichletParamCandidates = np.zeros(K) # SMC particles initialisation muSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples] ) #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) if K == 24: try: latin_hyp_samples = pickle.load( open('../../utils/sobol_200_25.pkl', 'rb')) except: latin_hyp_samples = pickle.load( open('../../models/utils/sobol_200_25.pkl', 'rb')) for beta_idx in range(numberOfBetaSamples): if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd(dist=d0, size=numberOfThetaSamples, dims=K + 1) muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0], betaPrior[0], betaPrior[1]) gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 1:], gammaPrior) gammaSamples[beta_idx] = np.transpose( gammaSamples[beta_idx].T / np.sum(gammaSamples[beta_idx], axis=1)) elif K == 2: muSamples = np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.random.dirichlet( gammaPrior, [numberOfBetaSamples, numberOfThetaSamples]) else: raise IndexError('Wrong number of task sets') logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) currentSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples]) log_proba_corr = 0. ante_proba_local = np.zeros(K) post_proba_local = np.zeros(K) sum_weightsList = np.zeros(numberOfThetaSamples) ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc) gammaAdaptedProba = np.zeros(K) likelihoods = np.zeros(K) positiveStates = np.zeros(K, dtype=np.intc) # Guided SMC variables muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) normalisedThetaWeights = np.zeros( [numberOfBetaSamples, numberOfThetaSamples]) temperatures = np.zeros(numberOfBetaSamples) temperatureAncestors = np.zeros(numberOfBetaSamples) + .5 # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') for beta_idx in range(numberOfBetaSamples): ances = betaAncestors[beta_idx] temperatures[beta_idx] = smc_c.bootstrap_smc_step_c(logThetaWeights[beta_idx], distances, muSamples[ances]/2. + 1./2, lambda_noise, eta_noise, noise_inertie, gammaSamples[ances], currentSamples[beta_idx], ancestorSamples[ances], weightsList, \ np.ascontiguousarray(mapping), stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\ post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, temperatureAncestors[ances]) # Move step normalisedThetaWeights[ beta_idx] = useful_functions.to_normalized_weights( logThetaWeights[beta_idx]) ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2) if (ess < coefficient * numberOfThetaSamples): acceptanceProba = 0. betaMu = np.sum(normalisedThetaWeights[beta_idx] * muSamples[ances]) betaVar = np.sum(normalisedThetaWeights[beta_idx] * (muSamples[ances] - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] * gammaSamples[ances].T, axis=1) dirichletVar = np.sum(normalisedThetaWeights[beta_idx] * (gammaSamples[ances]**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2 ) / (np.sum(dirichletVar)) - 1 dirichletParamCandidates[:] = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) if K == 2: muSamplesNew[beta_idx] = np.random.beta( betaAlpha, betaBeta, numberOfThetaSamples) gammaSamplesNew[beta_idx] = np.random.dirichlet( dirichletParamCandidates, numberOfThetaSamples) if K == 24: if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd( dist=d0, size=numberOfThetaSamples, dims=K + 1) muSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 0], betaAlpha, betaBeta) gammaSamplesNew[beta_idx] = gammalib.ppf( latin_hyp_samples[:, 1:], dirichletParamCandidates) gammaSamplesNew[beta_idx] = np.transpose( gammaSamplesNew[beta_idx].T / np.sum(gammaSamplesNew[beta_idx], axis=1)) logThetaWeightsNew[beta_idx] = 0. normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples else: muSamplesNew[beta_idx] = muSamples[ances] gammaSamplesNew[beta_idx] = gammaSamples[ances] logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx] # task set probability sum_tsProbability[:] = 0. for ts_idx in range(K): tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentSamples == ts_idx), axis=2), axis=1) sum_tsProbability += tsProbability[:, ts_idx] tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability) # Compute action likelihood sum_actionLik[:] = 0. for action_idx in range(numberOfActions): actionLikelihood[:, action_idx] = np.exp( np.log( np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] == action_idx], axis=1)) * beta_softmax) sum_actionLik += actionLikelihood[:, action_idx] rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] actionLikelihood[:] = np.transpose( actionLikelihood.T / sum_actionLik) * ( 1 - epsilon_softmax) + epsilon_softmax / numberOfActions betaWeights[:] = actionLikelihood[:, actions[T].astype(int)] filt_actionLkd[T] = actionLikelihood log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples) betaWeights = betaWeights / sum(betaWeights) betaAncestors[:] = useful_functions.stratified_resampling(betaWeights) # update particles muSamples[:] = muSamplesNew gammaSamples[:] = gammaSamplesNew logThetaWeights[:] = logThetaWeightsNew[betaAncestors] ancestorSamples[:] = currentSamples temperatureAncestors[:] = temperatures elapsed_time = time.time() - start_time_multi return log_proba_
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, model_id = 0): ''' model_id = 0 : 1 alpha, 1 beta model_id = 1 : n alpha, 1 beta model_id = 2 : n alpha, n beta ''' actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. # sample initialisation if model_id == 2: n_alpha = 6 n_beta = 6 tau_unique = np.unique(tau) x_coor_a = np.array([np.where(tau_unique == t)[0][0] for t in tau]) x_coor_b = np.array([np.where(tau_unique == t)[0][0] for t in tau]) + n_alpha elif model_id == 1: n_alpha = 6 n_beta = 1 tau_unique = np.unique(tau) x_coor_a = np.array([np.where(tau_unique == t)[0][0] for t in tau]) x_coor_b = np.zeros(len(tau), dtype=np.int8) + n_alpha else: n_alpha = 1 n_beta = 1 x_coor_a = np.zeros(len(tau), dtype=np.int8) x_coor_b = np.zeros(len(tau), dtype=np.int8) + n_alpha n_theta = n_alpha + n_beta if apply_rep_bias: n_theta += 1 samples = np.random.rand(nb_samples, n_theta) if temperature: upp_bound_beta = .6 else: upp_bound_beta = 2. samples[:, n_alpha:(n_beta + n_alpha)] = np.random.rand(nb_samples, n_beta) * upp_bound_beta if apply_rep_bias: samples[:, -1] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) Q_samples = np.zeros([nb_samples, 2]) + .5 prev_action = np.zeros(nb_samples) - 1 # ibis param esslist = np.zeros(T) log_weights = np.zeros(nb_samples) weights_a = np.zeros(nb_samples) p_loglkd = np.zeros(nb_samples) loglkd = np.zeros(nb_samples) marg_loglkd = 0 coefficient = .5 marg_loglkd_l = np.zeros(T) acceptance_l = [] # move step param move_samples = np.zeros([nb_samples, n_theta]) move_p_loglkd = np.zeros(nb_samples) Q_samples_move = np.zeros([nb_samples, 2]) prev_action_move = np.zeros(nb_samples) mean_Q = np.zeros([T, 2]) if show_progress : plt.figure(figsize=(15,9)); plt.suptitle("noiseless rl", fontsize=14); plt.ion() # loop for t_idx in range(T): #print t_idx if (t_idx+1) % 10 == 0 : sys.stdout.write(' ' + str(t_idx+1) + ' '); print 'marg_loglkd ' + str(marg_loglkd); if (t_idx+1) % 100 == 0: print ('\n') # epsilon assert(len(np.unique(prev_action)) == 1) # update step weights_a[:] = log_weights for n_idx in range(nb_samples): alpha = samples[n_idx, x_coor_a[t_idx]] if temperature: beta = 1./samples[n_idx, x_coor_b[t_idx]] else: beta = 10**samples[n_idx, x_coor_b[t_idx]] if apply_rep_bias: eta = samples[n_idx, -1] if prev_action[n_idx] != -1 and apply_rep_bias: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] else: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] if np.isnan(loglkd[n_idx]): print t_idx print n_idx print beta print value raise Exception p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx] log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx] if actions[t_idx] == 0: Q_samples[n_idx, 0] = (1 - alpha) * Q_samples[n_idx, 0] + alpha * rewards[t_idx] Q_samples[n_idx, 1] = (1 - alpha) * Q_samples[n_idx, 1] + alpha * (1 - rewards[t_idx]) else: Q_samples[n_idx, 0] = (1 - alpha) * Q_samples[n_idx, 0] + alpha * (1 - rewards[t_idx]) Q_samples[n_idx, 1] = (1 - alpha) * Q_samples[n_idx, 1] + alpha * rewards[t_idx] marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a) marg_loglkd_l[t_idx] = marg_loglkd ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights)) esslist[t_idx] = ess weights_a[:] = uf.to_normalized_weights(log_weights) mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0) # move step if ess < coefficient * nb_samples: idxTrajectories = uf.stratified_resampling(weights_a) mu_p = np.sum(samples.T * weights_a, axis=1) Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p)) nb_acceptance = 0. for n_idx in range(nb_samples): idx_traj = idxTrajectories[n_idx] while True: sample_p = multi_norm(mu_p, Sigma_p) if not apply_rep_bias: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta): break else: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta) and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor_a, x_coor_b, rewards, actions, t_idx + 1, apply_rep_bias, temperature) log_ratio = loglkd_prop - p_loglkd[idx_traj] \ + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p) log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. move_samples[n_idx] = sample_p move_p_loglkd[n_idx] = loglkd_prop Q_samples_move[n_idx] = Q_prop else: move_samples[n_idx] = samples[idx_traj] move_p_loglkd[n_idx] = p_loglkd[idx_traj] Q_samples_move[n_idx] = Q_samples[idx_traj] print 'acceptance ratio %s'%str(nb_acceptance/nb_samples) assert(prev_action_prop == prev_action[0]) acceptance_l.append(nb_acceptance/nb_samples) # move samples samples[:] = move_samples p_loglkd[:] = move_p_loglkd log_weights[:] = 0. Q_samples[:] = Q_samples_move if show_progress and t_idx%10==0 : weights_a[:] = uf.to_normalized_weights(log_weights) plt.subplot(3,2,1) plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2); plt.hold(False) plt.xlabel('trials') plt.ylabel('Q values') if apply_rep_bias == 1: mean_rep = np.sum(weights_a * samples[:,2]) std_rep = np.sqrt(np.sum(weights_a * samples[:,2]**2) - mean_rep**2) plt.subplot(3,2,2) x = np.linspace(-2.,2.,5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g'); plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(),'g', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('rep param') if temperature: mean_beta = np.sum(weights_a * 1./samples[:, 1]) std_beta = np.sqrt(np.sum(weights_a * ((1./samples[:,1])**2)) - mean_beta**2) else: mean_beta = np.sum(weights_a * 10**samples[:, 1]) std_beta = np.sqrt(np.sum(weights_a * ((10**samples[:,1])**2)) - mean_beta**2) plt.subplot(3,2,3) x = np.linspace(0.01,200.,5000) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2); plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(weights_a * samples[:, 0]) std_alpha_0 = np.sqrt(np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2) plt.subplot(3,2,4) x = np.linspace(0.,1.,5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2); plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2) plt.hold(False) plt.xlabel('learning rate (majenta)') plt.ylabel('pdf') plt.subplot(3,2,5) plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2); plt.hold(True) plt.plot(plt.gca().get_xlim(), [nb_samples/2, nb_samples/2],'b--', linewidth=2) plt.axis([0, t_idx-1, 0, nb_samples]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') plt.draw() plt.show() plt.pause(0.05) return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]