コード例 #1
0
def model_choice(models, obs):
    k = [i for i in xrange(2, 9)]

    Statistics = []

    for ki in k:
        print 'K = ', ki

        num_M = models[ki - 2].shape[0]
        print 'Num Models: ', num_M
        numNbins = len(obs[ki - 2])
        numHbins = len(obs[ki - 2][0])

        M = theano.shared(
            np.asarray(models[ki - 2], dtype=theano.config.floatX))

        ObSym = T.matrix(
        )  # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this

        Pred = theano.function(
            [], predictiveness_profiles(M, ki, len(
                models[ki - 2])))()  # This should be dealt with better too...
        Pred_n = Pred

        Pred = theano.shared(np.asarray(Pred, dtype=theano.config.floatX))

        # setup inference schemas and theano symbolic tensors
        if INFERENCE == 'underfit':
            profiles = make_agression_profiles(num_profiles, num_alpha)

            #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX))
            Alpha = T.arange(0., 1.0, 1. / num_alpha)
            Agression_profiles = T.matrix('Agr')
            nAlpha, nM, nO = T.iscalars('', '', '')

            Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha,
                                           Agression_profiles, Pred,
                                           pValue_alg)  #only works for 0...

        elif INFERENCE == 'bayes':
            profiles = make_priors_profiles(num_priors, num_M)

            Priors_profiles = T.matrix('Priors')
            Loss_funcs = T.arange(
                1, 5
            )  # Loss functions are choices in bayesian_choice numbered [1,4]
            nM, nO = T.iscalars('', '')

            Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki,
                                           Priors_profiles, Loss_funcs)

        else:
            print 'unknown inference algorithm...'
            quit()

        # all data for this K
        k_Data = kData(numNbins, numHbins, num_profiles)

        for i in xrange(numNbins):
            for j in xrange(numHbins):
                print 'bin ', i, j

                t0 = time.time()

                if obs[ki - 2][i][j] == [] or obs[ki -
                                                  2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    continue
                else:
                    num_obs = obs[ki - 2][i][j][0].shape[0]

                # allocate for predictiveness of model choice vs universe for each obs for each profile
                k_pred = kPred(num_obs, num_profiles)

                num_batches = int(np.ceil(num_obs / np.float(BATCH_SIZE)))

                for batch_index in xrange(num_batches):
                    top = BATCH_SIZE * (batch_index + 1) if batch_index < (
                        num_batches - 1) else num_obs
                    n_obs = top - BATCH_SIZE * (batch_index)
                    print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE * batch_index

                    if INFERENCE == 'underfit':
                        batch_choice = Choice_Maker.Choice_Profile_F(
                            profiles, num_alpha, num_M, n_obs,
                            obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top])
                        print batch_choice
                        for prof in xrange(num_profiles):
                            k_pred[prof][BATCH_SIZE * (
                                batch_index):top] = get_predictiveness_array(
                                    batch_choice[prof], obs[ki - 2][i][j][1],
                                    Pred_n, n_obs)

                    elif INFERENCE == 'bayes':
                        batch_choice = Choice_Maker.Choice_Profile_F(
                            profiles, num_M, n_obs,
                            obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top])
                        print batch_choice

                        for pr in xrange(num_priors):
                            for lf in xrange(num_loss_funcs):
                                k_pred[pr * num_loss_funcs +
                                       lf][BATCH_SIZE *
                                           (batch_index
                                            ):top] = get_predictiveness_array(
                                                batch_choice[pr][lf],
                                                obs[ki - 2][i][j][1], Pred_n,
                                                n_obs)

                for prof in xrange(num_profiles):
                    pred_moments = get_moments(k_pred[prof], num_obs)
                    for m in xrange(len(pred_moments)):
                        k_Data[prof][m][i, j] = pred_moments[m]

                t1 = time.time()
                print 'single bin takes: ', (t1 - t0) / 60., ' minutes'
        Statistics.append(k_Data)
        f = open('%s_k%d.pkl' % (name, ki), 'wb')
        pickle.dump(k_Data, f)
        f.close()

    return Statistics
コード例 #2
0
ファイル: model_choice.py プロジェクト: Underfit/underfit
def model_choice(models, obs):	
    k = [i for i in xrange(2, 9)]
    
    Statistics = []
    
    for ki in k:        
        print 'K = ', ki
                
        num_M = models[ki-2].shape[0]
        print 'Num Models: ', num_M
        numNbins = len(obs[ki-2])
        numHbins = len(obs[ki-2][0])
                
        M = theano.shared(np.asarray(models[ki-2], dtype = theano.config.floatX))
           
        ObSym = T.matrix() # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this

        Pred = theano.function([], predictiveness_profiles(M, ki, len(models[ki-2])))() # This should be dealt with better too...
        Pred_n = Pred

        Pred = theano.shared(np.asarray(Pred, dtype = theano.config.floatX))
        
        
        # setup inference schemas and theano symbolic tensors
        if INFERENCE == 'underfit':
            profiles = make_agression_profiles(num_profiles, num_alpha)
            
            #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX))
            Alpha = T.arange(0., 1.0, 1./num_alpha)
            Agression_profiles = T.matrix('Agr')
            nAlpha, nM, nO = T.iscalars('','','')

            Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha, Agression_profiles, Pred, pValue_alg) #only works for 0...
            
        elif INFERENCE == 'bayes':
            profiles = make_priors_profiles(num_priors, num_M)
            
            Priors_profiles = T.matrix('Priors')
            Loss_funcs = T.arange(1,5)  # Loss functions are choices in bayesian_choice numbered [1,4]
            nM, nO = T.iscalars('','')

            Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki, Priors_profiles, Loss_funcs)
            
        else:
            print 'unknown inference algorithm...'
            quit()
        

        # all data for this K
        k_Data = kData(numNbins, numHbins, num_profiles)
        

        for i in xrange(numNbins):
            for j in xrange(numHbins):
                print 'bin ', i, j
                
                t0 = time.time()
                
                if obs[ki-2][i][j] == [] or obs[ki-2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    continue
                else:
                    num_obs = obs[ki-2][i][j][0].shape[0]

                # allocate for predictiveness of model choice vs universe for each obs for each profile
                k_pred = kPred(num_obs, num_profiles)

                num_batches = int(np.ceil(num_obs/np.float(BATCH_SIZE)))


                for batch_index in xrange(num_batches):
                    top = BATCH_SIZE*(batch_index+1) if batch_index < (num_batches-1) else num_obs
                    n_obs = top - BATCH_SIZE*(batch_index)
                    print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE*batch_index
                    
                    if INFERENCE == 'underfit':                        
                        batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_alpha, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top])
                        print batch_choice
                        for prof in xrange(num_profiles):
                            k_pred[prof][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[prof],  obs[ki-2][i][j][1], Pred_n, n_obs)

                    elif INFERENCE == 'bayes':   
                        batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top])
                        print batch_choice
                     
                        for pr in xrange(num_priors):
                            for lf in xrange(num_loss_funcs):
                                k_pred[pr*num_loss_funcs + lf][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[pr][lf],  obs[ki-2][i][j][1], Pred_n, n_obs)
                        
                for prof in xrange(num_profiles):
                    pred_moments = get_moments(k_pred[prof], num_obs)
                    for m in xrange(len(pred_moments)):
                        k_Data[prof][m][i,j] = pred_moments[m]

                t1 = time.time()
                print 'single bin takes: ',(t1-t0)/60., ' minutes' 
        Statistics.append(k_Data)
        f = open('%s_k%d.pkl'%(name, ki), 'wb')
        pickle.dump(k_Data, f)
        f.close()
    
    return Statistics
コード例 #3
0
ファイル: model_choiceOLD.py プロジェクト: Underfit/underfit
def model_choice(models, obs):	
    k = [i for i in xrange(2, 9)]
    num_alpha = 10
    
    Choices = []
    Statistics = []

    Agression_profiles = []
    Agression_profiles.append(theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha)) #uniform
    Agression_profiles.append(theano.shared(np.array([[i*2./num_alpha] for i in xrange(num_alpha)], dtype = theano.config.floatX))) #agressive
    Agression_profiles.append(theano.shared(np.array([[2-i*2./num_alpha] for i in xrange(num_alpha)], dtype = theano.config.floatX))) #cautious
    
    for ki in k:
        Choices.append([])
        Statistics.append([])
        
        print 'K = ', ki
        
        #Agression = theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha)
        
        num_M = models[ki-2].shape[0]
        
        M = theano.shared(np.asarray(models[ki-2], dtype = theano.config.floatX))
        
        result = predictiveness_profiles(M, ki, len(models[ki-2]))
        
        t0=time.time()
        Pred = theano.function([], result)()
        t1=time.time()
        print 'pred took ', t1-t0, ' s'
        numNbins = len(obs[ki-2])
        numHbins = len(obs[ki-2][0])


        for i in xrange(numNbins):
            #Choices[ki-2].append([])
            Statistics[ki-2].append([])

            for j in xrange(numHbins):
                print 'bin ', i, j
                
                t0 = time.time()
                
                if obs[ki-2][i][j] == [] or obs[ki-2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    #Choices[ki-2][i].append([])
                    Statistics[ki-2][i].append([])
                    continue
                else:
                    num_obs = obs[ki-2][i][j][0].shape[0]
                print 'num obs ', num_obs

                if num_obs < 1000:
                    Obs = theano.shared(np.asarray(obs[ki-2][i][j][0], dtype = theano.config.floatX))
                    choice_bin = call_underfit_choice_theano(M, Obs, num_M, num_obs, ki, num_alpha, Agression_profiles, Pred)
                else:
                    Obs = theano.shared(np.asarray(obs[ki-2][i][j][0][0:1000], dtype = theano.config.floatX))
                    n_obs = len(Obs.get_value())
                    print n_obs
                    choice_bin = call_underfit_choice_theano(M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred) 
                    for batch_index in xrange(int(np.floor(num_obs/1000))):
                        top = 1000*(batch_index+2) if batch_index < (int(np.floor(num_obs/1000))-1) else len(obs[ki-2][i][j][0])
                        Obs = theano.shared(np.asarray(obs[ki-2][i][j][0][1000*(batch_index+1):top]))
                        n_obs = len(Obs.get_value())
                        print n_obs
                        batch_choice = call_underfit_choice_theano(M, Obs, num_M, n_obs, ki, num_alpha, Agression_profiles, Pred)
                        for ag in xrange(len(Agression_profiles)):
                            choice_bin[ag] = np.asarray(choice_bin[ag].tolist() + batch_choice[ag].tolist()) #appends batches for each agression function
                t1=time.time()
            
                print choice_bin#, obs[ki-2][i][j][1]
                
                print 'choices took ', t1-t0, ' s'
                
                #Choices[ki-2][i].append([choice_bin, obs[ki-2][i][j][1]])

                agg_bin = []
                for a in xrange(len(Agression_profiles)):
                    choice_pred = get_predictiveness_array(choices = choice_bin[a], verses = obs[ki-2][i][j][1], Preds = Pred, num_Obs = num_obs)
                    pred_moments = get_moments(choice_pred, num_obs)
                    agg_bin.append(pred_moments)
                
                Statistics[ki-2][i].append(agg_bin)

                t1 = time.time()


                print 'single bin takes: ',(t1-t0)/60., ' minutes' 
    
                #print choice_bin
            f = open('k%d_%d_chi_statistics.pkl'%(ki, i), 'wb')
            pickle.dump(Statistics[ki-2][i], f)
            f.close()
        f = open('k%d_chi_statistics.pkl'%ki, 'wb')
        pickle.dump(Statistics[ki-2],f)
        f.close()
    return Choices
コード例 #4
0
ファイル: model_choiceOLD.py プロジェクト: ebuchman/underfit
def model_choice(models, obs):
    k = [i for i in xrange(2, 9)]
    num_alpha = 10

    Choices = []
    Statistics = []

    Agression_profiles = []
    Agression_profiles.append(
        theano.shared(
            np.ones((num_alpha, 1), dtype=theano.config.floatX) /
            num_alpha))  #uniform
    Agression_profiles.append(
        theano.shared(
            np.array([[i * 2. / num_alpha] for i in xrange(num_alpha)],
                     dtype=theano.config.floatX)))  #agressive
    Agression_profiles.append(
        theano.shared(
            np.array([[2 - i * 2. / num_alpha] for i in xrange(num_alpha)],
                     dtype=theano.config.floatX)))  #cautious

    for ki in k:
        Choices.append([])
        Statistics.append([])

        print 'K = ', ki

        #Agression = theano.shared(np.ones((num_alpha, 1), dtype = theano.config.floatX)/num_alpha)

        num_M = models[ki - 2].shape[0]

        M = theano.shared(
            np.asarray(models[ki - 2], dtype=theano.config.floatX))

        result = predictiveness_profiles(M, ki, len(models[ki - 2]))

        t0 = time.time()
        Pred = theano.function([], result)()
        t1 = time.time()
        print 'pred took ', t1 - t0, ' s'
        numNbins = len(obs[ki - 2])
        numHbins = len(obs[ki - 2][0])

        for i in xrange(numNbins):
            #Choices[ki-2].append([])
            Statistics[ki - 2].append([])

            for j in xrange(numHbins):
                print 'bin ', i, j

                t0 = time.time()

                if obs[ki - 2][i][j] == [] or obs[ki -
                                                  2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    #Choices[ki-2][i].append([])
                    Statistics[ki - 2][i].append([])
                    continue
                else:
                    num_obs = obs[ki - 2][i][j][0].shape[0]
                print 'num obs ', num_obs

                if num_obs < 1000:
                    Obs = theano.shared(
                        np.asarray(obs[ki - 2][i][j][0],
                                   dtype=theano.config.floatX))
                    choice_bin = call_underfit_choice_theano(
                        M, Obs, num_M, num_obs, ki, num_alpha,
                        Agression_profiles, Pred)
                else:
                    Obs = theano.shared(
                        np.asarray(obs[ki - 2][i][j][0][0:1000],
                                   dtype=theano.config.floatX))
                    n_obs = len(Obs.get_value())
                    print n_obs
                    choice_bin = call_underfit_choice_theano(
                        M, Obs, num_M, n_obs, ki, num_alpha,
                        Agression_profiles, Pred)
                    for batch_index in xrange(int(np.floor(num_obs / 1000))):
                        top = 1000 * (batch_index + 2) if batch_index < (
                            int(np.floor(num_obs / 1000)) - 1) else len(
                                obs[ki - 2][i][j][0])
                        Obs = theano.shared(
                            np.asarray(obs[ki -
                                           2][i][j][0][1000 *
                                                       (batch_index + 1):top]))
                        n_obs = len(Obs.get_value())
                        print n_obs
                        batch_choice = call_underfit_choice_theano(
                            M, Obs, num_M, n_obs, ki, num_alpha,
                            Agression_profiles, Pred)
                        for ag in xrange(len(Agression_profiles)):
                            choice_bin[ag] = np.asarray(
                                choice_bin[ag].tolist() +
                                batch_choice[ag].tolist()
                            )  #appends batches for each agression function
                t1 = time.time()

                print choice_bin  #, obs[ki-2][i][j][1]

                print 'choices took ', t1 - t0, ' s'

                #Choices[ki-2][i].append([choice_bin, obs[ki-2][i][j][1]])

                agg_bin = []
                for a in xrange(len(Agression_profiles)):
                    choice_pred = get_predictiveness_array(
                        choices=choice_bin[a],
                        verses=obs[ki - 2][i][j][1],
                        Preds=Pred,
                        num_Obs=num_obs)
                    pred_moments = get_moments(choice_pred, num_obs)
                    agg_bin.append(pred_moments)

                Statistics[ki - 2][i].append(agg_bin)

                t1 = time.time()

                print 'single bin takes: ', (t1 - t0) / 60., ' minutes'

                #print choice_bin
            f = open('k%d_%d_chi_statistics.pkl' % (ki, i), 'wb')
            pickle.dump(Statistics[ki - 2][i], f)
            f.close()
        f = open('k%d_chi_statistics.pkl' % ki, 'wb')
        pickle.dump(Statistics[ki - 2], f)
        f.close()
    return Choices