def model_choice(models, obs): k = [i for i in xrange(2, 9)] Statistics = [] for ki in k: print 'K = ', ki num_M = models[ki - 2].shape[0] print 'Num Models: ', num_M numNbins = len(obs[ki - 2]) numHbins = len(obs[ki - 2][0]) M = theano.shared( np.asarray(models[ki - 2], dtype=theano.config.floatX)) ObSym = T.matrix( ) # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this Pred = theano.function( [], predictiveness_profiles(M, ki, len( models[ki - 2])))() # This should be dealt with better too... Pred_n = Pred Pred = theano.shared(np.asarray(Pred, dtype=theano.config.floatX)) # setup inference schemas and theano symbolic tensors if INFERENCE == 'underfit': profiles = make_agression_profiles(num_profiles, num_alpha) #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX)) Alpha = T.arange(0., 1.0, 1. / num_alpha) Agression_profiles = T.matrix('Agr') nAlpha, nM, nO = T.iscalars('', '', '') Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha, Agression_profiles, Pred, pValue_alg) #only works for 0... elif INFERENCE == 'bayes': profiles = make_priors_profiles(num_priors, num_M) Priors_profiles = T.matrix('Priors') Loss_funcs = T.arange( 1, 5 ) # Loss functions are choices in bayesian_choice numbered [1,4] nM, nO = T.iscalars('', '') Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki, Priors_profiles, Loss_funcs) else: print 'unknown inference algorithm...' quit() # all data for this K k_Data = kData(numNbins, numHbins, num_profiles) for i in xrange(numNbins): for j in xrange(numHbins): print 'bin ', i, j t0 = time.time() if obs[ki - 2][i][j] == [] or obs[ki - 2][i][j][0].shape[1] == 0: #there are no observtions in this N*H bin... continue else: num_obs = obs[ki - 2][i][j][0].shape[0] # allocate for predictiveness of model choice vs universe for each obs for each profile k_pred = kPred(num_obs, num_profiles) num_batches = int(np.ceil(num_obs / np.float(BATCH_SIZE))) for batch_index in xrange(num_batches): top = BATCH_SIZE * (batch_index + 1) if batch_index < ( num_batches - 1) else num_obs n_obs = top - BATCH_SIZE * (batch_index) print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE * batch_index if INFERENCE == 'underfit': batch_choice = Choice_Maker.Choice_Profile_F( profiles, num_alpha, num_M, n_obs, obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top]) print batch_choice for prof in xrange(num_profiles): k_pred[prof][BATCH_SIZE * ( batch_index):top] = get_predictiveness_array( batch_choice[prof], obs[ki - 2][i][j][1], Pred_n, n_obs) elif INFERENCE == 'bayes': batch_choice = Choice_Maker.Choice_Profile_F( profiles, num_M, n_obs, obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top]) print batch_choice for pr in xrange(num_priors): for lf in xrange(num_loss_funcs): k_pred[pr * num_loss_funcs + lf][BATCH_SIZE * (batch_index ):top] = get_predictiveness_array( batch_choice[pr][lf], obs[ki - 2][i][j][1], Pred_n, n_obs) for prof in xrange(num_profiles): pred_moments = get_moments(k_pred[prof], num_obs) for m in xrange(len(pred_moments)): k_Data[prof][m][i, j] = pred_moments[m] t1 = time.time() print 'single bin takes: ', (t1 - t0) / 60., ' minutes' Statistics.append(k_Data) f = open('%s_k%d.pkl' % (name, ki), 'wb') pickle.dump(k_Data, f) f.close() return Statistics
def model_choice(models, obs): k = [i for i in xrange(2, 9)] Statistics = [] for ki in k: print 'K = ', ki num_M = models[ki-2].shape[0] print 'Num Models: ', num_M numNbins = len(obs[ki-2]) numHbins = len(obs[ki-2][0]) M = theano.shared(np.asarray(models[ki-2], dtype = theano.config.floatX)) ObSym = T.matrix() # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this Pred = theano.function([], predictiveness_profiles(M, ki, len(models[ki-2])))() # This should be dealt with better too... Pred_n = Pred Pred = theano.shared(np.asarray(Pred, dtype = theano.config.floatX)) # setup inference schemas and theano symbolic tensors if INFERENCE == 'underfit': profiles = make_agression_profiles(num_profiles, num_alpha) #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX)) Alpha = T.arange(0., 1.0, 1./num_alpha) Agression_profiles = T.matrix('Agr') nAlpha, nM, nO = T.iscalars('','','') Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha, Agression_profiles, Pred, pValue_alg) #only works for 0... elif INFERENCE == 'bayes': profiles = make_priors_profiles(num_priors, num_M) Priors_profiles = T.matrix('Priors') Loss_funcs = T.arange(1,5) # Loss functions are choices in bayesian_choice numbered [1,4] nM, nO = T.iscalars('','') Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki, Priors_profiles, Loss_funcs) else: print 'unknown inference algorithm...' quit() # all data for this K k_Data = kData(numNbins, numHbins, num_profiles) for i in xrange(numNbins): for j in xrange(numHbins): print 'bin ', i, j t0 = time.time() if obs[ki-2][i][j] == [] or obs[ki-2][i][j][0].shape[1] == 0: #there are no observtions in this N*H bin... continue else: num_obs = obs[ki-2][i][j][0].shape[0] # allocate for predictiveness of model choice vs universe for each obs for each profile k_pred = kPred(num_obs, num_profiles) num_batches = int(np.ceil(num_obs/np.float(BATCH_SIZE))) for batch_index in xrange(num_batches): top = BATCH_SIZE*(batch_index+1) if batch_index < (num_batches-1) else num_obs n_obs = top - BATCH_SIZE*(batch_index) print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE*batch_index if INFERENCE == 'underfit': batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_alpha, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top]) print batch_choice for prof in xrange(num_profiles): k_pred[prof][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[prof], obs[ki-2][i][j][1], Pred_n, n_obs) elif INFERENCE == 'bayes': batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top]) print batch_choice for pr in xrange(num_priors): for lf in xrange(num_loss_funcs): k_pred[pr*num_loss_funcs + lf][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[pr][lf], obs[ki-2][i][j][1], Pred_n, n_obs) for prof in xrange(num_profiles): pred_moments = get_moments(k_pred[prof], num_obs) for m in xrange(len(pred_moments)): k_Data[prof][m][i,j] = pred_moments[m] t1 = time.time() print 'single bin takes: ',(t1-t0)/60., ' minutes' Statistics.append(k_Data) f = open('%s_k%d.pkl'%(name, ki), 'wb') pickle.dump(k_Data, f) f.close() return Statistics
def model_choice(models, obs): k = [i for i in xrange(2, 9)] num_alpha = 10 Choices = [] Statistics = [] Agression_profiles = [] Agression_profiles.append(theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha)) #uniform Agression_profiles.append(theano.shared(np.array([[i*2./num_alpha] for i in xrange(num_alpha)], dtype = theano.config.floatX))) #agressive Agression_profiles.append(theano.shared(np.array([[2-i*2./num_alpha] for i in xrange(num_alpha)], dtype = theano.config.floatX))) #cautious for ki in k: Choices.append([]) Statistics.append([]) print 'K = ', ki #Agression = theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha) num_M = models[ki-2].shape[0] M = theano.shared(np.asarray(models[ki-2], dtype = theano.config.floatX)) result = predictiveness_profiles(M, ki, len(models[ki-2])) t0=time.time() Pred = theano.function([], result)() t1=time.time() print 'pred took ', t1-t0, ' s' numNbins = len(obs[ki-2]) numHbins = len(obs[ki-2][0]) for i in xrange(numNbins): #Choices[ki-2].append([]) Statistics[ki-2].append([]) for j in xrange(numHbins): print 'bin ', i, j t0 = time.time() if obs[ki-2][i][j] == [] or obs[ki-2][i][j][0].shape[1] == 0: #there are no observtions in this N*H bin... #Choices[ki-2][i].append([]) Statistics[ki-2][i].append([]) continue else: num_obs = obs[ki-2][i][j][0].shape[0] print 'num obs ', num_obs if num_obs < 1000: Obs = theano.shared(np.asarray(obs[ki-2][i][j][0], dtype = theano.config.floatX)) choice_bin = call_underfit_choice_theano(M, Obs, num_M, num_obs, ki, num_alpha, Agression_profiles, Pred) else: Obs = theano.shared(np.asarray(obs[ki-2][i][j][0][0:1000], dtype = theano.config.floatX)) n_obs = len(Obs.get_value()) print n_obs choice_bin = call_underfit_choice_theano(M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred) for batch_index in xrange(int(np.floor(num_obs/1000))): top = 1000*(batch_index+2) if batch_index < (int(np.floor(num_obs/1000))-1) else len(obs[ki-2][i][j][0]) Obs = theano.shared(np.asarray(obs[ki-2][i][j][0][1000*(batch_index+1):top])) n_obs = len(Obs.get_value()) print n_obs batch_choice = call_underfit_choice_theano(M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred) for ag in xrange(len(Agression_profiles)): choice_bin[ag] = np.asarray(choice_bin[ag].tolist() + batch_choice[ag].tolist()) #appends batches for each agression function t1=time.time() print choice_bin#, obs[ki-2][i][j][1] print 'choices took ', t1-t0, ' s' #Choices[ki-2][i].append([choice_bin, obs[ki-2][i][j][1]]) agg_bin = [] for a in xrange(len(Agression_profiles)): choice_pred = get_predictiveness_array(choices = choice_bin[a], verses = obs[ki-2][i][j][1], Preds = Pred, num_Obs = num_obs) pred_moments = get_moments(choice_pred, num_obs) agg_bin.append(pred_moments) Statistics[ki-2][i].append(agg_bin) t1 = time.time() print 'single bin takes: ',(t1-t0)/60., ' minutes' #print choice_bin f = open('k%d_%d_chi_statistics.pkl'%(ki, i), 'wb') pickle.dump(Statistics[ki-2][i], f) f.close() f = open('k%d_chi_statistics.pkl'%ki, 'wb') pickle.dump(Statistics[ki-2],f) f.close() return Choices
def model_choice(models, obs): k = [i for i in xrange(2, 9)] num_alpha = 10 Choices = [] Statistics = [] Agression_profiles = [] Agression_profiles.append( theano.shared( np.ones((num_alpha, 1), dtype=theano.config.floatX) / num_alpha)) #uniform Agression_profiles.append( theano.shared( np.array([[i * 2. / num_alpha] for i in xrange(num_alpha)], dtype=theano.config.floatX))) #agressive Agression_profiles.append( theano.shared( np.array([[2 - i * 2. / num_alpha] for i in xrange(num_alpha)], dtype=theano.config.floatX))) #cautious for ki in k: Choices.append([]) Statistics.append([]) print 'K = ', ki #Agression = theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha) num_M = models[ki - 2].shape[0] M = theano.shared( np.asarray(models[ki - 2], dtype=theano.config.floatX)) result = predictiveness_profiles(M, ki, len(models[ki - 2])) t0 = time.time() Pred = theano.function([], result)() t1 = time.time() print 'pred took ', t1 - t0, ' s' numNbins = len(obs[ki - 2]) numHbins = len(obs[ki - 2][0]) for i in xrange(numNbins): #Choices[ki-2].append([]) Statistics[ki - 2].append([]) for j in xrange(numHbins): print 'bin ', i, j t0 = time.time() if obs[ki - 2][i][j] == [] or obs[ki - 2][i][j][0].shape[1] == 0: #there are no observtions in this N*H bin... #Choices[ki-2][i].append([]) Statistics[ki - 2][i].append([]) continue else: num_obs = obs[ki - 2][i][j][0].shape[0] print 'num obs ', num_obs if num_obs < 1000: Obs = theano.shared( np.asarray(obs[ki - 2][i][j][0], dtype=theano.config.floatX)) choice_bin = call_underfit_choice_theano( M, Obs, num_M, num_obs, ki, num_alpha, Agression_profiles, Pred) else: Obs = theano.shared( np.asarray(obs[ki - 2][i][j][0][0:1000], dtype=theano.config.floatX)) n_obs = len(Obs.get_value()) print n_obs choice_bin = call_underfit_choice_theano( M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred) for batch_index in xrange(int(np.floor(num_obs / 1000))): top = 1000 * (batch_index + 2) if batch_index < ( int(np.floor(num_obs / 1000)) - 1) else len( obs[ki - 2][i][j][0]) Obs = theano.shared( np.asarray(obs[ki - 2][i][j][0][1000 * (batch_index + 1):top])) n_obs = len(Obs.get_value()) print n_obs batch_choice = call_underfit_choice_theano( M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred) for ag in xrange(len(Agression_profiles)): choice_bin[ag] = np.asarray( choice_bin[ag].tolist() + batch_choice[ag].tolist() ) #appends batches for each agression function t1 = time.time() print choice_bin #, obs[ki-2][i][j][1] print 'choices took ', t1 - t0, ' s' #Choices[ki-2][i].append([choice_bin, obs[ki-2][i][j][1]]) agg_bin = [] for a in xrange(len(Agression_profiles)): choice_pred = get_predictiveness_array( choices=choice_bin[a], verses=obs[ki - 2][i][j][1], Preds=Pred, num_Obs=num_obs) pred_moments = get_moments(choice_pred, num_obs) agg_bin.append(pred_moments) Statistics[ki - 2][i].append(agg_bin) t1 = time.time() print 'single bin takes: ', (t1 - t0) / 60., ' minutes' #print choice_bin f = open('k%d_%d_chi_statistics.pkl' % (ki, i), 'wb') pickle.dump(Statistics[ki - 2][i], f) f.close() f = open('k%d_chi_statistics.pkl' % ki, 'wb') pickle.dump(Statistics[ki - 2], f) f.close() return Choices