def best_model_sel(prob_treshold,survey_type="random", pro_path="/home/jemejia/CosmicVarianceLAES/"):
   
    ID_file=pro_path + "data/mock_survey/" + "ID_" + survey_type + "_surveys.dat"
    p_values_file= pro_path + "data/mock_survey/" + "p_values_FOF_ID_" + survey_type + "_surveys.dat"

    ks_data=np.loadtxt(p_values_file) 
    

    m_min_arr = ks_data[:,0]
    m_max_arr =ks_data[:,1]
    f_occ_arr =ks_data[:,2]
    ID_survey_arr = ks_data[:,3]

    model_prob_arr = ks_data[:,4]
    print np.size(m_min_arr)
    #choosing th models with ks test probabilities greater than prob_treshold
    index=np.where(model_prob_arr>=prob_treshold)
    print np.size(index)
    best_models=np.empty( [ np.size(index) , np.size( ks_data[0,:] ) ] )
    del(ks_data)

    best_models[:,0]=m_min_arr[index]
    best_models[:,1]=m_max_arr[index]
    best_models[:,2]=f_occ_arr[index]
    best_models[:,3]= ID_survey_arr[index] 
    best_models[:,4]=model_prob_arr[index]
    print "the number of selected models are", np.size(best_models[:,0])
    return best_models
Example #2
0
def read(filename):
    """Accepts filename string.
        Reads filename line by line and unpickles from json each line.
        Returns generator of objects.
    """
    if '.npy.list.npz' in filename:
        a = numpy.load(filename)
        return [a['arr_%s' % i] for i in xrange(len(a.keys()))]
    elif '.npy' in filename:
        # use numpy to read in; may be .gz compressed
        return numpy.loadtxt(filename)
    else:
        with open(filename, 'r') as f:
            return [cjson.decode(r) for r in f.xreadlines()]
Example #3
0
                [
                 1.7,
                 2.0,
                 1.2,
                 4.8,
                 5,
                 4.2,
                ])

    
    #var = SupervisedLDAVars(test_data, K=3)
    #var = SupervisedLDAVars(noisy_test_data, K=3)



    # use my big generated dataset
    n = 9994

    labeled_documents = topiclib.read_sparse('data/synthbigtlc/labeled.dat')[:100]
    y = np.loadtxt('data/synthbigtlc/yL.npy')[:100]
    real_data = (labeled_documents, y)

    var = PartialSupervisedLDAVars(real_data, Ks=5, Kb=20)

    try:
        output = run_partial_slda(var)
    except Exception,e:
        print e
        import pdb; pdb.post_mortem()

def best_model_correlation(best_model_array, theta_min,theta_max,theta_bins, survey_type="random",field="full", distance=6558.3, obs_surveys=12,x_width=46.0,y_width=35.0, z_depth=41.0 ,box_length=250,random_cat_number=16, pro_path="/home/jemejia/CosmicVariance/"):

    
    print "computing correlation functions of the selected models"
    dmh_path=pro_path+"data/dark_matter/FOF/"
    laes_path=pro_path+"data/laes/FOF/"
    n_i= int( box_length/x_width)
    n_j= int( box_length/y_width)
    n_k= int( box_length/z_depth)
    n_models = n_i * n_j * n_k
    
    ID_file=pro_path + "data/mock_survey/" + "ID_" + survey_type + "_surveys.dat"
    

   
   
    
    ID_data=np.loadtxt(ID_file,dtype='int') 
    

    survey_ID=ID_data[:,0]
    field_ID=ID_data[:,1]
    i_field = ID_data[:,2]
    j_field = ID_data[:,3]
    k_field = ID_data[:,4]

    moc_surveys=survey_ID[-1]

    ID_arr=best_models_array[:,3]    
    index_eq_ID=np.where(ID_arr== 1)

    cat_number= index_eq_ID[0]-1
 
    i_fields_to_measure=[]
    j_fields_to_measure=[]
    k_fields_to_measure=[]
    m_min_to_measure=[]
    m_max_to_measure=[]
    f_occ_to_measure=[]
    #choosing the subcatalogs of the best fields.
    best_correlation=np.empty([len(ID_arr),theta_bins])
    std_correlation=np.empty([len(ID_arr),theta_bins])
    
    #for w in range( len(ID_arr) ):
    for w in range( 7 ): 
        index=np.where( survey_ID == int(ID_arr[w]) )
        
        S_ID=survey_ID[index]
        ID_ini=S_ID[0]
        ID_end=int(ID_ini+obs_surveys)
        m_min=best_models[w,0]
        m_max=best_models[w,1]
        f_occ=best_models[w,2]
        print "model:",w,"parameters:" ,m_min, m_max, f_occ
        i_s=i_field[ID_ini:ID_end]
        j_s=j_field[ID_ini:ID_end]
        k_s=k_field[ID_ini:ID_end]
        
        
        
        corr=np.zeros( (len(i_s),theta_bins) )
        corr_peebles=np.zeros( (len(i_s),theta_bins) )
        corr_standard=np.zeros( (len(i_s),theta_bins) )
        corr_laes=np.zeros(theta_bins)
        
        if(field=="large"):
            i_range=7
            print "large field"
            
        else:
            i_range=np.size(i_s)
            print "full field"
        print "number of sub-catalogs=",i_range
        for i in range( i_range ):
            
            dmh_filename=dmh_path+"halos_bolshoi_"+str(i_s[i])+"-"+str(j_s[i])+"-"+str(k_s[i])+".csv"
            halos_prop=np.loadtxt(dmh_filename,delimiter=",",skiprows=12)
            
            halo_mass=halos_prop[:,4]
            
            x_halos=halos_prop[:,0]
            y_halos=halos_prop[:,1]
            z_halos=halos_prop[:,2]
            numbers=np.arange(len(halo_mass))
            halo_index=np.where( (halo_mass< m_max) & (halo_mass> m_min) )
            halo_mass_sel=halo_mass[halo_index]
            halo_index=numbers[halo_index]
                    
            np.random.shuffle(halo_index)
            
            
            
            
            n_halos=np.size(halo_mass_sel)
            del halo_mass_sel
            n_laes=int(f_occ*n_halos)
            
            lae_index=halo_index[0:n_laes]
            x_laes=x_halos[lae_index]
            y_laes=y_halos[lae_index]
            
            del x_halos
            del y_halos
            del z_halos
            del halo_mass
            
            #random cat histogram generation
            #P.xlabel(r'$\theta$', fontsize=16)
            #P.ylabel(r"$\xi(\theta)$",fontsize=16)
            if(w==0):
                if(i==0):
                    print w,i
                    print "computing RR (it takes much time but it is only computed once)"
                    x_random= x_width*np.random.random_sample(n_laes*random_cat_number)
                    y_random=y_width*np.random.random_sample(n_laes*random_cat_number)
                    RR,bins=RR_histogram(x_laes,y_laes,x_random,y_random,distance,theta_min,theta_max,theta_bins,cat_number=random_cat_number)
                    print RR
                
            print "subcat number ",i,"i j k=",i_s[i],j_s[i],k_s[i]

            #random-survey histogram generation
            Xmin=x_width*i_s[i]
            Xmax=Xmin + x_width
            Ymin=y_width*j_s[i]
            Ymax=Ymin + y_width
                
                
            x_random= Xmin +  ( Xmax - Xmin )*np.random.random_sample(n_laes)
            y_random=Ymin +   ( Ymax - Ymin )*np.random.random_sample(n_laes)
            
            DR,bins=DR_histogram(x_laes,y_laes,x_random,y_random,distance,theta_min,theta_max,theta_bins,cat_number=1)
            
            #survey histogram generation
            DD,bins=DD_histogram(x_laes,y_laes,distance,theta_min,theta_max,theta_bins)
            
            corr[i,:]=landy_correlation(DD,RR,DR)
            print "CORR_landy=",corr[i,:]
            
        corr_laes=np.mean(corr,axis=0)
        std_corr=np.std(corr,axis=0)
        print "corr_landy=",corr_laes, "std_landy=",std_corr
        
        best_correlation[w,:]=corr_laes
        std_correlation[w,:]=std_corr
        dtheta=(theta_max - theta_min)/theta_bins
        
        correlation_data=np.empty(( np.size(corr_laes) , 3 ) )
        model='{0}_{1}_{2}'.format(m_min, m_max, f_occ)
        model_name = 'model_{0}_{1}_{2}'.format(m_min, m_max, f_occ)
        filename=pro_path + "data/mock_survey/" + "correlation_best_models/" + survey_type + "_correlation_" + model_name + ".dat"
        
        angles = np.linspace( theta_min + dtheta/2.0 , theta_max - dtheta/2.0, theta_bins )
        correlation_data[:,0]=angles
        correlation_data[:,1]=best_correlation[w,:]
        correlation_data[:,2]=std_correlation[w,:]
        
        np.savetxt(filename,correlation_data)
        
        #P.errorbar(correlation_data[:,0]+2.0*w, correlation_data[:,1], correlation_data[:,2],label=model,elinewidth=2.0)
        

    file_plot=pro_path + "data/mock_survey/" + "correlation_best_models/" + survey_type + "_" + field  +"_"+ "correlation_plots" + ".png"
    #P.legend(shadow=False)
    obs_correlation_file=pro_path + "data/obs/hayashino_whole_SSA22_field.txt"
    obs_correlation=np.loadtxt(obs_correlation_file,skiprows=4)
    #P.ylim(ymax=0.6)
    #P.xlim(xmax=1040)
    
    #P.errorbar(obs_correlation[0:theta_bins,0]-3.0, obs_correlation[0:theta_bins,1], obs_correlation[0:theta_bins,2],label="Hayashino et al 2004",elinewidth=3.0,fmt="o-")
    #P.legend(shadow=False)
    #P.title(survey_type)
    #P.savefig(file_plot)
    #P.figure()
    return best_correlation,std_correlation,angles
Example #5
0
run_tlc = partial(graphlib.run_variational_em, 
                    e_step_func=tlc_e_step, 
                    m_step_func=tlc_m_step, 
                    global_elbo_func=tlc_global_elbo,
                    print_func=tlc_print_func)


            
if __name__=='__main__':
    dirname = 'synthtlc'
    dirname = 'synthbig'

    # use my tlc synthetically generated dataset
    documents = topiclib.read_sparse(dirname + '/documents.dat')
    comments = topiclib.read_sparse(dirname + '/comments.dat')
    labeled_documents = topiclib.read_sparse(dirname + '/labeled.dat')
    background = topiclib.read_sparse(dirname + '/background.dat')

    y = np.loadtxt(dirname + '/yL.npy')
    real_data = (documents, comments, labeled_documents, background, y)

    var = TLCVars(real_data, Ku=29, Ks=5, Kb=24)

    try:
        output = run_tlc(var)
    except Exception,e:
        print e
        import pdb; pdb.post_mortem()

Example #6
0
                 [(5,2), (6,1), (8,1), (9,1),],
                ],
                [
                 1.7,
                 2.0,
                 1.2,
                 4.8,
                 5,
                 4.2,
                ])

    
    #var = SupervisedLDAVars(test_data, K=3)
    #var = SupervisedLDAVars(noisy_test_data, K=3)



    # use my big generated dataset
    labeled_documents = topiclib.read_sparse('synthtlc/labeled.dat')
    y = np.loadtxt('synthtlc/yL.npy')
    real_data = (labeled_documents, y)

    var = SupervisedLDAVars(real_data, K=13)

    try:
        output = run_slda(var)
    except Exception,e:
        print e
        import pdb; pdb.post_mortem()