def main(): """ Make a random catalogue for each tile in a field in the CFHTLens survey. example: python make_cfhtlens_random_cat.py W3 options: second user input specifies the mask file to use. A "?" argument returns the list of mask files. """ global filepath global savepath filepath = cu.get_data_path() + 'CFHTLens/masks/' savepath = cu.get_output_path() + 'processed_data/CFHTLens/random_catalogues/' field = sys.argv[1] print 'field:', field if len(sys.argv)>2: filenames = sys.argv[2] else: filenames = os.listdir(cu.get_data_path()+'CFHTLens/masks/') filenames = fnmatch.filter(filenames, field+'*.fits') # a "?" as the 2nd user input returns the lost of mask files and exits. if filenames == '?': filenames = os.listdir(cu.get_data_path()+'CFHTLens/masks/') filenames = fnmatch.filter(filenames, field+'*.fits') for filename in filenames: print(filename) return 0 p = Pool(8) p.map(do_work,filenames)
def main(): filepath = cu.get_data_path() + 'Chinchilla/halo_catalogues/' savepath = cu.get_output_path( ) + 'processed_data/Chinchilla/halo_catalogues/' catalogue = 'hlist_1.00000.list' name = ['scale', 'id', 'desc_scale', 'desc_id', 'num_prog', 'pid', 'upid',\ 'desc_pid', 'phantom', 'sam_mvir', 'mvir', 'rvir', 'rs', 'vrms',\ 'mmp?', 'scale_of_last_MM', 'vmax', 'x', 'y', 'z', 'vx', 'vy', 'vz',\ 'Jx', 'Jy', 'Jz', 'Spin', 'Breadth_first_ID', 'Depth_first_ID',\ 'Tree_root_ID', 'Orig_halo_ID', 'Snap_num', 'Next_coprogenitor_depthfirst_ID',\ 'Last_progenitor_depthfirst_ID', 'Rs_Klypin', 'Mvir_all', 'M200b', 'M200c',\ 'M500c', 'M2500c', 'Xoff', 'Voff', 'Spin_Bullock', 'b_to_a', 'c_to_a',\ 'A[x]', 'A[y]', 'A[z]', 'T/|U|', 'Macc', 'Mpeak', 'Vacc', 'Vpeak',\ 'Halfmass_Scale', 'Acc_Rate_Inst', 'Acc_Rate_100Myr', 'Acc_Rate_1*Tdyn',\ 'Acc_Rate_2*Tdyn', 'Acc_Rate_Mpeak', 'Mpeak_Scale', 'Acc_Scale','M4%_Scale'] print 'reading in:', catalogue filename = catalogue data = ascii.read(filepath+filename, delimiter='\s', names=name, \ guess=False, Reader=ascii.Basic, data_start = 0) f = h5py.File(savepath + catalogue + '.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() filename = catalogue data_table = table.table.Table(data=data) ascii.write(data_table, savepath + filename + '.dat')
def main(): filepath = cu.get_data_path() + 'CFHTLens/masks/' savepath = cu.get_output_path() + 'processed_data/CFHTLens/masks/' field = 'W1' filenames = os.listdir('/scratch/dac29/data/CFHTLens/masks/') filenames = fnmatch.filter(filenames, field + '*.fits') #for filename in filenames: filename = filenames[0] hdulist = fits.open(filepath + filename, memmap=True) header = hdulist[0].header print header nxpix = header['NAXIS1'] nypix = header['NAXIS2'] tile = header['OBJECT'] w = wcs.WCS(hdulist[0].header) corners = np.array( [[0, 0], [0, nypix - 1], [nxpix - 1, nypix - 1], [nxpix - 1, 0]], np.float_) world = w.wcs_pix2world(corners, 1) print world data = hdulist[0].data hdulist.close()
def main(): filepath = cu.get_data_path() + 'Chinchilla/halo_catalogues/' catalogue = 'hlist_1.00000.list' column_info = ([(1, 'id', 'int'), (5, 'pid', 'int'), (6, 'upid', 'int'), (10, 'mvir', 'float'), (11, 'rvir', 'float'), (12, 'rs', 'float'), (15, 'scale_of_last_MM', 'float'), (16, 'vmax', 'float'), (17, 'x', 'float'), (18, 'y', 'float'), (19, 'z', 'float'), (20, 'vx', 'float'), (21, 'vy', 'float'), (22, 'vz', 'float'), (56, 'macc', 'float'), (57, 'mpeak', 'float'), (58, 'vacc', 'float'), (59, 'vpeak', 'float'), (60, 'halfmpeak_scale', 'float'), (63, 'mar_tdyn', 'float'), (66, 'mpeak_scale', 'float'), (67, 'acc_scale', 'float'), (68, 'm04_scale', 'float')]) mp_bolshoi = 1.35e8 mp_chinchilla = mp_bolshoi / 8. mpeak_cut = mp_chinchilla * 100. cuts = [(57, mpeak_cut, None)] halos = reader_tools.read_halocat(filepath + catalogue, column_info, input_halo_cuts=cuts) print halos
def main(): """ read in cfhtlens catalogue and check object mask values against fits mask. """ import h5py if len(sys.argv)>1: field = sys.argv[1] else: field='W3' if len(sys.argv)>2: N_test = sys.argv[2] else: N_test = 100 #import cfhtlens catalogues filepath = cu.get_output_path()+'processed_data/CFHTLens/' f = h5py.File(filepath+'catalogues/'+field+'.hdf5', 'r') W = f.get(field) for i in range(0,N_test): ind = np.random.randint(0,len(W),1)[0] field = W['field'][ind] object_mask_value = W['MASK'][ind] mask_filename = field+'_izrgu_finalmask_mosaic.fits' filepath = cu.get_data_path()+'CFHTLens/masks/' data, w = read_mask(filepath, mask_filename) x,y = get_pixel((W['ALPHA_J2000'],W['DELTA_J2000']),w) fits_mask_value = data[y,x] #row,column ---> y,x print(i,object_mask_value,fits_mask_value) assert object_mask_value==fits_mask_value, "fits mask does not match catalogue mask!"
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/3rd_run/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/mock_runs/3rd_run/' ################################################################# catalogues=['Mr19_age_distribution_matching_mock_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle_radec_mock.dat'] #names = ['IDgroup','richness','ra_center','dec_center','z_center','sigma_v','rms_size','foo1','foo1b','IDgal',\ # 'ra','dec','z','foo2','foo3','foo4','foo5','R_proj'] names = ['IDgroup','IDgal','ra','dec','z'] N = len(catalogues) for i in range(0,N): filename = catalogues[i] data = ascii.read(filepath+filename,names=names) print data print savepath+filename[:-4]+'.hdf5' f = h5py.File(savepath+filename[:-4]+'.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/2nd_run/' savepath = cu.get_output_path( ) + 'processed_data/berlind_groupcat/mock_runs/2nd_run/' ################################################################# catalogues=['Mr19_age_distribution_matching_mock_cen_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle_radec_mock_groups.dat'] names = ['IDgroup', 'IDgal', 'ra', 'dec', 'cz'] for i in range(0, len(catalogues)): filename = catalogues[i] data = ascii.read(filepath + filename, names=names) print data print savepath + filename[:-4] + '.hdf5' f = h5py.File(savepath + filename[:-4] + '.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/' ################################################################# catalogues=['mr19_groups.fits', 'smthresh10.2.groups.dat', 'smthresh9.8.groups.dat'] filename = catalogues[0] hdulist = fits.open(filepath+filename, memmap=True) data = hdulist[1].data print 'saving as:', savepath+filename[:-5]+'.hdf5' f = h5py.File(savepath+filename[:-5]+'.hdf5', 'w') dset = f.create_dataset(filename[:-5], data=data) f.close() gc.collect() names = ['ra','dec','z','groupID','rank','Mstar','Mr','SSFR','Mgroup'] filename = catalogues[1] data = ascii.read(filepath+filename, guess=True, Reader=ascii.Basic, names=names,data_start=0) print 'saving as:', savepath+filename[:-4]+'.hdf5' f = h5py.File(savepath+filename[:-4]+'.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close() gc.collect() filename = catalogues[2] data = ascii.read(filepath+filename, guess=True, Reader=ascii.Basic, names=names,data_start=0) print 'saving as:', savepath+filename[:-4]+'.hdf5' f = h5py.File(savepath+filename[:-4]+'.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close() gc.collect()
def mask_filenames(field): """ return the list of filenames of cfhtlens fits masks for a specified field. """ import os import fnmatch filenames = os.listdir(cu.get_data_path()+'CFHTLens/masks/') filenames = fnmatch.filter(filenames, field+'*.fits') return filenames
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/' ################################################################# catalogues = [ 'mr19_groups.fits', 'smthresh10.2.groups.dat', 'smthresh9.8.groups.dat' ] filename = catalogues[0] hdulist = fits.open(filepath + filename, memmap=True) data = hdulist[1].data print 'saving as:', savepath + filename[:-5] + '.hdf5' f = h5py.File(savepath + filename[:-5] + '.hdf5', 'w') dset = f.create_dataset(filename[:-5], data=data) f.close() gc.collect() names = [ 'ra', 'dec', 'z', 'groupID', 'rank', 'Mstar', 'Mr', 'SSFR', 'Mgroup' ] filename = catalogues[1] data = ascii.read(filepath + filename, guess=True, Reader=ascii.Basic, names=names, data_start=0) print 'saving as:', savepath + filename[:-4] + '.hdf5' f = h5py.File(savepath + filename[:-4] + '.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close() gc.collect() filename = catalogues[2] data = ascii.read(filepath + filename, guess=True, Reader=ascii.Basic, names=names, data_start=0) print 'saving as:', savepath + filename[:-4] + '.hdf5' f = h5py.File(savepath + filename[:-4] + '.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close() gc.collect()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/Berlind_run/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/mock_runs/Berlind_run/' ################################################################# catalogue = 'Age_matching_group_tags.dat' names = ['IDgal','IDgroup'] filename = catalogue data = ascii.read(filepath+filename,names=names) print data print savepath+filename[:-4]+'.hdf5' f = h5py.File(savepath+filename[:-4]+'.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/Berlind_run/' savepath = cu.get_output_path( ) + 'processed_data/berlind_groupcat/mock_runs/Berlind_run/' ################################################################# catalogue = 'Age_matching_group_tags.dat' names = ['IDgal', 'IDgroup'] filename = catalogue data = ascii.read(filepath + filename, names=names) print data print savepath + filename[:-4] + '.hdf5' f = h5py.File(savepath + filename[:-4] + '.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close()
def main(): filepath = cu.get_data_path() + 'tinker_groupcat/' savepath = cu.get_output_path() + 'processed_data/tinker_groupcat/' ################################################################# names=['ID','ra','dec','z','M_star','Mag_r','Mag_g','SSFR','p_sat','central_ind','M_halo','R_vir','d_proj'] catalogues=['group_dr7_m.star9.7','group_dr7_m.star10.1','group_dr7_m.star10.6','group_dr7_mag.r19'] for catalogue in catalogues: print catalogue filename = catalogue+'.txt' print filepath+filename data = ascii.read(filepath+filename, delimiter='\s', names=names, \ guess=False, Reader=ascii.Basic) f = h5py.File(savepath+catalogue+'.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() gc.collect()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/4th_run/' savepath = cu.get_output_path( ) + 'processed_data/berlind_groupcat/mock_runs/4th_run/' ################################################################# catalogues=['Mr19_age_distribution_matching_mock_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle_radec_mock.dat'] names = ['IDgroup', 'IDgal', 'ra', 'dec', 'cz'] for i in range(0, len(catalogues)): filename = catalogues[i] catalogue = filename[:-4] data_1 = ascii.read(filepath + filename, names=names, format='no_header', comment='#') #create output file with galaxy ID and group ID dtype = [('gal_ID', '>i8'), ('group_ID', '>i8')] dtype = np.dtype(dtype) data = np.recarray((len(data_1), ), dtype=dtype) #fill in data structure. data['gal_ID'] = data_1['IDgal'] data['group_ID'] = data_1['IDgroup'] #save print 'saving:', savepath + catalogue + '.hdf5' f = h5py.File(savepath + catalogue + '.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() print 'done.'
def main(): filepath = cu.get_data_path() + 'CFHTLens/masks/' savepath = cu.get_output_path() + 'processed_data/CFHTLens/masks/' field='W1' filenames = os.listdir('/scratch/dac29/data/CFHTLens/masks/') filenames = fnmatch.filter(filenames, field+'*.fits') #for filename in filenames: filename = filenames[0] hdulist = fits.open(filepath+filename, memmap=True) header = hdulist[0].header print header nxpix = header['NAXIS1'] nypix = header['NAXIS2'] tile = header['OBJECT'] w = wcs.WCS(hdulist[0].header) corners = np.array([[0,0],[0,nypix-1],[nxpix-1,nypix-1],[nxpix-1,0]], np.float_) world = w.wcs_pix2world(corners, 1) print world data = hdulist[0].data hdulist.close()
def main(): filepath = cu.get_data_path() + 'Multidark/Bolshoi/particle_catalogues/' savepath = cu.get_output_path( ) + 'processed_data/Multidark/Bolshoi/particle_catalogues/' catalogue = 'bolshoi_a1.0003_2e5_particles' print 'reading in:', catalogue filename = catalogue hdulist = fits.open(filepath + catalogue + '.fits') header = hdulist[1] print header.data x = np.array(header.data['POS'][:, 0]) y = np.array(header.data['POS'][:, 1]) z = np.array(header.data['POS'][:, 2]) data = np.recarray((len(x), ), formats=['f4', 'f4', 'f4'], names=['x', 'y', 'z']) data['x'] = x data['y'] = y data['z'] = z filename = catalogue f = h5py.File(savepath + filename + '.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() filename = catalogue data_table = table.table.Table(data=data) ascii.write(data_table, savepath + filename + '.dat') print data_table
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/2nd_run/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/mock_runs/2nd_run/' ################################################################# catalogues=['Mr19_age_distribution_matching_mock_cen_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock_groups.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle_radec_mock_groups.dat'] names = ['IDgroup','IDgal','ra','dec','cz'] for i in range(0,len(catalogues)): filename = catalogues[i] data = ascii.read(filepath+filename,names=names) print data print savepath+filename[:-4]+'.hdf5' f = h5py.File(savepath+filename[:-4]+'.hdf5', 'w') dset = f.create_dataset(filename[:-4], data=data) f.close()
def main(): filepath = cu.get_data_path() + 'Berlind_groupcat/mock_runs/4th_run/' savepath = cu.get_output_path() + 'processed_data/berlind_groupcat/mock_runs/4th_run/' ################################################################# catalogues=['Mr19_age_distribution_matching_mock_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock.dat',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle_radec_mock.dat'] names = ['IDgroup','IDgal','ra','dec','cz'] for i in range(0,len(catalogues)): filename = catalogues[i] catalogue = filename[:-4] data_1 = ascii.read(filepath+filename,names=names,format='no_header',comment='#') #create output file with galaxy ID and group ID dtype=[('gal_ID','>i8'),('group_ID','>i8')] dtype=np.dtype(dtype) data = np.recarray((len(data_1),),dtype=dtype) #fill in data structure. data['gal_ID'] = data_1['IDgal'] data['group_ID'] = data_1['IDgroup'] #save print 'saving:', savepath+catalogue+'.hdf5' f = h5py.File(savepath+catalogue+'.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() print 'done.'
def main(): filepath = cu.get_data_path() + 'CFHTLens/queries/' savepath = cu.get_output_path() + 'processed_data/CFHTLens/catalogues/' filenames = glob.glob('/scratch/dac29/data/CFHTLens/queries/*.tsv') for filename in filenames: print filename field = 'test' print 'reading in:',field filename = 'test.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() gc.collect() print 'done.' ''' #let's split this field into two parts... field='W1' print 'reading in:',field, 'part 1' filename = 'CFHTLens_2013-09-03T20:10:04.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic, data_end=5114944) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'_1.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' field='W1' print 'reading in:',field, 'part 2' filename = 'CFHTLens_2013-09-03T20:10:04.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic, data_start=5114945) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'_2.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' #combine the wo parts into one big set!!! field = 'W1' f1 = h5py.File(savepath+field+'_1.hdf5', 'r') dset1 = f1.get(field) f2 = h5py.File(savepath+field+'_2.hdf5', 'r') dset2 = f2.get(field) f = h5py.File(savepath+field+'.hdf5', 'w') print dset1.dtype length = len(dset1)+len(dset2) dset = f.create_dataset(field, (length,), dtype=dset1.dtype) print 'len(dset1)=',len(dset1),'len(dset2)=', len(dset2),\ 'len(dset1)+len(dset2)=', len(dset1)+len(dset2), 'len(dset)=', len(dset) print len(dset[0:len(dset1)]), len(dset[len(dset1):len(dset1)+len(dset2)+1]) print len(dset[0:len(dset1)]), len(dset[len(dset1):len(dset1)+len(dset2)]) print 0,len(dset1),len(dset1)+1, len(dset1)+len(dset2)+1 dset[0:len(dset1)] = dset1 dset[len(dset1):len(dset1)+len(dset2)+1] = dset2 f1.close() f2.close() f.close() dset=0 dset1=0 dset2=0 gc.collect() ''' field='W2' print 'reading in:',field filename = 'CFHTLens_2013-09-03T20:15:29.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' ''' field='W3' print 'reading in:',field filename = 'CFHTLens_2013-09-03T20:16:44.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' ''' field='W4' print 'reading in:',field filename = 'CFHTLens_2013-09-03T20:18:01.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.'
def main(): filepath = cu.get_data_path() + 'NYU_VAGC/' savepath = cu.get_output_path() + 'processed_data/NYU_VAGC/' filenames = os.listdir('/scratch/dac29/data/NYU_VAGC/') filenames = fnmatch.filter(filenames, '*.fits') #turn each catalogue into an hdf5 file for filename in filenames: print 'reading in:', filename hdulist = fits.open(filepath+filename) data = hdulist[1].data print 'saving as:', filename[:-5]+'.hdf5' f = h5py.File(savepath+filename[:-5]+'.hdf5', 'w') dset = f.create_dataset(filename[:-5], data=data) f.close() gc.collect() print 'done.' #adding in the k-correct to z=0 files on 01/14/2015 catalogues = ['kcorrect.none.model.z0.10','kcorrect.nearest.petro.z0.10',\ 'kcorrect.nearest.model.z0.10','kcorrect.none.petro.z0.10',\ 'object_catalog','object_sdss_imaging','object_sdss_spectro',\ 'sersic_catalog', 'collisions.none', 'collisions.nearest',\ 'kcorrect.none.model.z0.00','kcorrect.nearest.petro.z0.00',\ 'kcorrect.nearest.model.z0.00','kcorrect.none.petro.z0.00'] catalogue='object_catalog' f1 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset1 = f1.get(catalogue) catalogue='object_sdss_imaging' f2 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset2 = f2.get(catalogue) catalogue='object_sdss_spectro' f3 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset3 = f3.get(catalogue) catalogue='kcorrect.none.model.z0.10' f4 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset4 = f4.get(catalogue) catalogue='kcorrect.none.petro.z0.10' f5 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset5 = f5.get(catalogue) catalogue='kcorrect.nearest.model.z0.10' f6 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset6 = f6.get(catalogue) catalogue='kcorrect.nearest.petro.z0.10' f7 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset7 = f7.get(catalogue) catalogue='sersic_catalog' f8 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset8 = f8.get(catalogue) catalogue='collisions.none' f9 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset9 = f9.get(catalogue) catalogue='collisions.nearest' f10 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset10 = f10.get(catalogue) #adding in the k-correct to z=0 files on 01/14/2015 catalogue='kcorrect.none.model.z0.00' f11 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset11 = f11.get(catalogue) catalogue='kcorrect.none.petro.z0.00' f12 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset12 = f12.get(catalogue) catalogue='kcorrect.nearest.model.z0.00' f13 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset13 = f13.get(catalogue) catalogue='kcorrect.nearest.petro.z0.00' f14 = h5py.File(savepath+catalogue+'.hdf5', 'r') dset14 = f14.get(catalogue) dset1_descr = dset1.dtype.descr dset2_descr = dset2.dtype.descr dset3_descr = dset3.dtype.descr dset4_descr = dset4.dtype.descr dset5_descr = dset5.dtype.descr dset6_descr = dset6.dtype.descr dset7_descr = dset7.dtype.descr dset8_descr = dset8.dtype.descr dset9_descr = dset9.dtype.descr #adding in the k-correct to z=0 files on 01/14/2015 dset10_descr = dset10.dtype.descr dset11_descr = dset11.dtype.descr dset12_descr = dset12.dtype.descr dset13_descr = dset13.dtype.descr dset14_descr = dset14.dtype.descr #KCORRECT and ABS mags d1 = (dset4.dtype.descr[7][0]+'_u.none.model.z0.10',dset4.dtype.descr[7][1]) d2 = (dset4.dtype.descr[7][0]+'_g.none.model.z0.10',dset4.dtype.descr[7][1]) d3 = (dset4.dtype.descr[7][0]+'_r.none.model.z0.10',dset4.dtype.descr[7][1]) d4 = (dset4.dtype.descr[7][0]+'_i.none.model.z0.10',dset4.dtype.descr[7][1]) d5 = (dset4.dtype.descr[7][0]+'_z.none.model.z0.10',dset4.dtype.descr[7][1]) d6 = (dset4.dtype.descr[5][0]+'_u.none.model.z0.10',dset4.dtype.descr[5][1]) d7 = (dset4.dtype.descr[5][0]+'_g.none.model.z0.10',dset4.dtype.descr[5][1]) d8 = (dset4.dtype.descr[5][0]+'_r.none.model.z0.10',dset4.dtype.descr[5][1]) d9 = (dset4.dtype.descr[5][0]+'_i.none.model.z0.10',dset4.dtype.descr[5][1]) d10 = (dset4.dtype.descr[5][0]+'_z.none.model.z0.10',dset4.dtype.descr[5][1]) #KCORRECT and ABS mags d11 = (dset5.dtype.descr[7][0]+'_u.none.petro.z0.10',dset5.dtype.descr[7][1]) d12 = (dset5.dtype.descr[7][0]+'_g.none.petro.z0.10',dset5.dtype.descr[7][1]) d13 = (dset5.dtype.descr[7][0]+'_r.none.petro.z0.10',dset5.dtype.descr[7][1]) d14 = (dset5.dtype.descr[7][0]+'_i.none.petro.z0.10',dset5.dtype.descr[7][1]) d15 = (dset5.dtype.descr[7][0]+'_z.none.petro.z0.10',dset5.dtype.descr[7][1]) d16 = (dset5.dtype.descr[5][0]+'_u.none.petro.z0.10',dset5.dtype.descr[5][1]) d17 = (dset5.dtype.descr[5][0]+'_g.none.petro.z0.10',dset5.dtype.descr[5][1]) d18 = (dset5.dtype.descr[5][0]+'_r.none.petro.z0.10',dset5.dtype.descr[5][1]) d19 = (dset5.dtype.descr[5][0]+'_i.none.petro.z0.10',dset5.dtype.descr[5][1]) d20 = (dset5.dtype.descr[5][0]+'_z.none.petro.z0.10',dset5.dtype.descr[5][1]) #KCORRECT and ABS mags d21 = (dset6.dtype.descr[7][0]+'_u.nearest.model.z0.10',dset6.dtype.descr[7][1]) d22 = (dset6.dtype.descr[7][0]+'_g.nearest.model.z0.10',dset6.dtype.descr[7][1]) d23 = (dset6.dtype.descr[7][0]+'_r.nearest.model.z0.10',dset6.dtype.descr[7][1]) d24 = (dset6.dtype.descr[7][0]+'_i.nearest.model.z0.10',dset6.dtype.descr[7][1]) d25 = (dset6.dtype.descr[7][0]+'_z.nearest.model.z0.10',dset6.dtype.descr[7][1]) d26 = (dset6.dtype.descr[5][0]+'_u.nearest.model.z0.10',dset6.dtype.descr[5][1]) d27 = (dset6.dtype.descr[5][0]+'_g.nearest.model.z0.10',dset6.dtype.descr[5][1]) d28 = (dset6.dtype.descr[5][0]+'_r.nearest.model.z0.10',dset6.dtype.descr[5][1]) d29 = (dset6.dtype.descr[5][0]+'_i.nearest.model.z0.10',dset6.dtype.descr[5][1]) d30 = (dset6.dtype.descr[5][0]+'_z.nearest.model.z0.10',dset6.dtype.descr[5][1]) #KCORRECT and ABS mags d31 = (dset7.dtype.descr[7][0]+'_u.nearest.petro.z0.10',dset7.dtype.descr[7][1]) d32 = (dset7.dtype.descr[7][0]+'_g.nearest.petro.z0.10',dset7.dtype.descr[7][1]) d33 = (dset7.dtype.descr[7][0]+'_r.nearest.petro.z0.10',dset7.dtype.descr[7][1]) d34 = (dset7.dtype.descr[7][0]+'_i.nearest.petro.z0.10',dset7.dtype.descr[7][1]) d35 = (dset7.dtype.descr[7][0]+'_z.nearest.petro.z0.10',dset7.dtype.descr[7][1]) d36 = (dset7.dtype.descr[5][0]+'_u.nearest.petro.z0.10',dset7.dtype.descr[5][1]) d37 = (dset7.dtype.descr[5][0]+'_g.nearest.petro.z0.10',dset7.dtype.descr[5][1]) d38 = (dset7.dtype.descr[5][0]+'_r.nearest.petro.z0.10',dset7.dtype.descr[5][1]) d39 = (dset7.dtype.descr[5][0]+'_i.nearest.petro.z0.10',dset7.dtype.descr[5][1]) d40 = (dset7.dtype.descr[5][0]+'_z.nearest.petro.z0.10',dset7.dtype.descr[5][1]) #adding in the k-correct to z=0 files on 01/14/2015 #KCORRECT and ABS mags d1_00 = (dset11.dtype.descr[7][0]+'_u.none.model.z0.00',dset4.dtype.descr[7][1]) d2_00 = (dset11.dtype.descr[7][0]+'_g.none.model.z0.00',dset4.dtype.descr[7][1]) d3_00 = (dset11.dtype.descr[7][0]+'_r.none.model.z0.00',dset4.dtype.descr[7][1]) d4_00 = (dset11.dtype.descr[7][0]+'_i.none.model.z0.00',dset4.dtype.descr[7][1]) d5_00 = (dset11.dtype.descr[7][0]+'_z.none.model.z0.00',dset4.dtype.descr[7][1]) d6_00 = (dset11.dtype.descr[5][0]+'_u.none.model.z0.00',dset4.dtype.descr[5][1]) d7_00 = (dset11.dtype.descr[5][0]+'_g.none.model.z0.00',dset4.dtype.descr[5][1]) d8_00 = (dset11.dtype.descr[5][0]+'_r.none.model.z0.00',dset4.dtype.descr[5][1]) d9_00 = (dset11.dtype.descr[5][0]+'_i.none.model.z0.00',dset4.dtype.descr[5][1]) d10_00 = (dset11.dtype.descr[5][0]+'_z.none.model.z0.00',dset4.dtype.descr[5][1]) #KCORRECT and ABS mags d11_00 = (dset12.dtype.descr[7][0]+'_u.none.petro.z0.00',dset5.dtype.descr[7][1]) d12_00 = (dset12.dtype.descr[7][0]+'_g.none.petro.z0.00',dset5.dtype.descr[7][1]) d13_00 = (dset12.dtype.descr[7][0]+'_r.none.petro.z0.00',dset5.dtype.descr[7][1]) d14_00 = (dset12.dtype.descr[7][0]+'_i.none.petro.z0.00',dset5.dtype.descr[7][1]) d15_00 = (dset12.dtype.descr[7][0]+'_z.none.petro.z0.00',dset5.dtype.descr[7][1]) d16_00 = (dset12.dtype.descr[5][0]+'_u.none.petro.z0.00',dset5.dtype.descr[5][1]) d17_00 = (dset12.dtype.descr[5][0]+'_g.none.petro.z0.00',dset5.dtype.descr[5][1]) d18_00 = (dset12.dtype.descr[5][0]+'_r.none.petro.z0.00',dset5.dtype.descr[5][1]) d19_00 = (dset12.dtype.descr[5][0]+'_i.none.petro.z0.00',dset5.dtype.descr[5][1]) d20_00 = (dset12.dtype.descr[5][0]+'_z.none.petro.z0.00',dset5.dtype.descr[5][1]) #KCORRECT and ABS mags d21_00 = (dset13.dtype.descr[7][0]+'_u.nearest.model.z0.00',dset6.dtype.descr[7][1]) d22_00 = (dset13.dtype.descr[7][0]+'_g.nearest.model.z0.00',dset6.dtype.descr[7][1]) d23_00 = (dset13.dtype.descr[7][0]+'_r.nearest.model.z0.00',dset6.dtype.descr[7][1]) d24_00 = (dset13.dtype.descr[7][0]+'_i.nearest.model.z0.00',dset6.dtype.descr[7][1]) d25_00 = (dset13.dtype.descr[7][0]+'_z.nearest.model.z0.00',dset6.dtype.descr[7][1]) d26_00 = (dset13.dtype.descr[5][0]+'_u.nearest.model.z0.00',dset6.dtype.descr[5][1]) d27_00 = (dset13.dtype.descr[5][0]+'_g.nearest.model.z0.00',dset6.dtype.descr[5][1]) d28_00 = (dset13.dtype.descr[5][0]+'_r.nearest.model.z0.00',dset6.dtype.descr[5][1]) d29_00 = (dset13.dtype.descr[5][0]+'_i.nearest.model.z0.00',dset6.dtype.descr[5][1]) d30_00 = (dset13.dtype.descr[5][0]+'_z.nearest.model.z0.00',dset6.dtype.descr[5][1]) #KCORRECT and ABS mags d31_00 = (dset14.dtype.descr[7][0]+'_u.nearest.petro.z0.00',dset7.dtype.descr[7][1]) d32_00 = (dset14.dtype.descr[7][0]+'_g.nearest.petro.z0.00',dset7.dtype.descr[7][1]) d33_00 = (dset14.dtype.descr[7][0]+'_r.nearest.petro.z0.00',dset7.dtype.descr[7][1]) d34_00 = (dset14.dtype.descr[7][0]+'_i.nearest.petro.z0.00',dset7.dtype.descr[7][1]) d35_00 = (dset14.dtype.descr[7][0]+'_z.nearest.petro.z0.00',dset7.dtype.descr[7][1]) d36_00 = (dset14.dtype.descr[5][0]+'_u.nearest.petro.z0.00',dset7.dtype.descr[5][1]) d37_00 = (dset14.dtype.descr[5][0]+'_g.nearest.petro.z0.00',dset7.dtype.descr[5][1]) d38_00 = (dset14.dtype.descr[5][0]+'_r.nearest.petro.z0.00',dset7.dtype.descr[5][1]) d39_00 = (dset7.dtype.descr[5][0]+'_i.nearest.petro.z0.00',dset7.dtype.descr[5][1]) d40_00 = (dset7.dtype.descr[5][0]+'_z.nearest.petro.z0.00',dset7.dtype.descr[5][1]) d_sersic = (dset8.dtype.descr[2][0]+'_r',dset7.dtype.descr[5][1]) #adding in the k-correct to z=0 files on 01/14/2015 dtype = np.dtype([dset1_descr[0],dset1_descr[1],dset1_descr[2],dset10_descr[3],dset3_descr[28],\ dset10_descr[9],dset10_descr[7],dset3_descr[37],dset3_descr[38],dset3_descr[0],d_sersic,\ d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19,d20,\ d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31,d32,d33,d34,d35,d36,d37,d38,d39,d40,\ d1_00,d2_00,d3_00,d4_00,d5_00,d6_00,d7_00,d8_00,d9_00,d10_00,\ d11_00,d12_00,d13_00,d14_00,d15_00,d16_00,d17_00,d18_00,d19_00,d20_00,\ d21_00,d22_00,d23_00,d24_00,d25_00,d26_00,d27_00,d28_00,d29_00,d30_00,\ d31_00,d32_00,d33_00,d34_00,d35_00,d36_00,d37_00,d38_00,d39_00,d40_00]) N_columns = len(dtype) i=0 print ' ' for name in dtype.descr: print i, name i=i+1 data = np.recarray((len(dset1), ), dtype=dtype) data[data.dtype.descr[0][0]] = dset1[data.dtype.descr[0][0]] #ID data[data.dtype.descr[1][0]] = dset1[data.dtype.descr[1][0]] #ra data[data.dtype.descr[2][0]] = dset1[data.dtype.descr[2][0]] #dec data[data.dtype.descr[3][0]] = dset10[data.dtype.descr[3][0]] #z data[data.dtype.descr[4][0]] = dset3[data.dtype.descr[4][0]] #zerr data[data.dtype.descr[5][0]] = dset10[data.dtype.descr[5][0]] #ztype data[data.dtype.descr[6][0]] = dset10[data.dtype.descr[6][0]] #neighbor used data[data.dtype.descr[7][0]] = dset3[data.dtype.descr[7][0]] #vdisp data[data.dtype.descr[8][0]] = dset3[data.dtype.descr[8][0]] #vdisperr data[data.dtype.descr[9][0]] = dset3[data.dtype.descr[9][0]] #SDSS_SPECTRO_TAG x=np.column_stack(dset8['SERSIC_N']) data[data.dtype.descr[10][0]] = x[0] #N_sersic r_band y=np.column_stack(dset4['KCORRECT']) x=np.column_stack(dset4['ABSMAG']) for i in range(0,5): #7-11 + 3 etc... print i, data.dtype.descr[i+7+4][0] data[data.dtype.descr[i+7+4][0]] = y[i] for i in range(0,5): #12-16 print i, data.dtype.descr[i+12+4] data[data.dtype.descr[i+12+4][0]] = x[i] y=np.column_stack(dset5['KCORRECT']) x=np.column_stack(dset5['ABSMAG']) for i in range(0,5): #17-21 print i, data.dtype.descr[i+17+4] data[data.dtype.descr[i+17+4][0]] = y[i] for i in range(0,5): #22-26 print i, data.dtype.descr[i+22+4] data[data.dtype.descr[i+22+4][0]] = x[i] y=np.column_stack(dset6['KCORRECT']) x=np.column_stack(dset6['ABSMAG']) for i in range(0,5): #27-31 print i, data.dtype.descr[i+27+4] data[data.dtype.descr[i+27+4][0]] = y[i] for i in range(0,5): #32-36 print i, data.dtype.descr[i+32+4] data[data.dtype.descr[i+32+4][0]] = x[i] y=np.column_stack(dset7['KCORRECT']) x=np.column_stack(dset7['ABSMAG']) for i in range(0,5): #37-41 print i, data.dtype.descr[i+37+4] data[data.dtype.descr[i+37+4][0]] = y[i] for i in range(0,5): #42-46 print i, data.dtype.descr[i+42+4] data[data.dtype.descr[i+42+4][0]] = x[i] #adding in the k-correct to z=0 files on 01/14/2015 y=np.column_stack(dset11['KCORRECT']) x=np.column_stack(dset11['ABSMAG']) for i in range(0,5): #47-51 + 3 etc... print i, data.dtype.descr[i+47+4][0] data[data.dtype.descr[i+47+4][0]] = y[i] for i in range(0,5): #52-56 print i, data.dtype.descr[i+52+4] data[data.dtype.descr[i+52+4][0]] = x[i] y=np.column_stack(dset12['KCORRECT']) x=np.column_stack(dset12['ABSMAG']) for i in range(0,5): #57-61 print i, data.dtype.descr[i+57+4] data[data.dtype.descr[i+57+4][0]] = y[i] for i in range(0,5): #62-66 print i, data.dtype.descr[i+62+4] data[data.dtype.descr[i+62+4][0]] = x[i] y=np.column_stack(dset13['KCORRECT']) x=np.column_stack(dset13['ABSMAG']) for i in range(0,5): #67-71 print i, data.dtype.descr[i+67+4] data[data.dtype.descr[i+67+4][0]] = y[i] for i in range(0,5): #72-76 print i, data.dtype.descr[i+72+4] data[data.dtype.descr[i+72+4][0]] = x[i] y=np.column_stack(dset14['KCORRECT']) x=np.column_stack(dset14['ABSMAG']) for i in range(0,5): #77-81 print i, data.dtype.descr[i+77+4] data[data.dtype.descr[i+77+4][0]] = y[i] for i in range(0,5): #82-86 print i, data.dtype.descr[i+82+4] data[data.dtype.descr[i+82+4][0]] = x[i] #save the resultant table filename = 'nyu_vagc_dr7' f = h5py.File(savepath+filename+'.hdf5', 'w') dset = f.create_dataset(filename, data=data) f.close()
def main(): filepath = cu.get_data_path()+'Tinker_groupcat/mock_runs/4th_run/' savepath = cu.get_output_path()+'processed_data/tinker_groupcat/mock_runs/4th_run/' ############################################################################################# i = int(sys.argv[1]) catalogues = ['clf_groups_M19_1','clf_groups_M19_2','clf_groups_M19_3','clf_groups_M19_4',\ 'clf_groups_M19_5','clf_groups_M19_6','clf_groups_M19_7'] mocks = ['Mr19_age_distribution_matching_mock_sys_empty_shuffle_satsys_shuffle',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_cen_shuffle',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle',\ 'Mr19_age_distribution_matching_mock',\ 'Mr19_age_distribution_matching_mock_satsys_shuffle',\ 'Mr19_age_distribution_matching_mock_cen_shuffle'] catalogue = catalogues[i] mock = mocks[i] ############################################################################################# #open group files names=['foo1','group_id','cen_id','group_mass','group_mass_prev','n_sat','l_tot',\ 'l_central','foo2','cen_ra','cen_dec','cen_cz','foo3','foo4'] filename = catalogue+'.groups' groups = ascii.read(filepath+filename, delimiter='\s', names=names, \ data_start=0,format='no_header') groups = np.array(groups) print 'number of groups:', len(groups) #open the satellite probability files names = ['foo1','gal_id','group_id','cen_id','M_r','p_sat','lum','foo2','foo3','foo4',\ 'R_proj','d_gal','da_halo'] filename = catalogue+'.prob' prob = ascii.read(filepath+filename, delimiter='\s', names=names, \ data_start=0,format='no_header') prob = np.array(prob) print 'nuber of galaxies:', len(prob) print 'number of satellites:', len(np.where(prob['p_sat']>0.5)[0]) #open the index files names = ['foo1','ind','M_r'] filename = catalogue+'.indx' indx = ascii.read(filepath+filename, delimiter='\s', names=names, \ data_start=0,format='no_header') indx = np.array(indx) print 'number of galaxies:', len(indx) ############################################################################################# #open the radec mock filename = mock+'_radec_mock.dat' filepath = cu.get_output_path()+'processed_data/hearin_mocks/custom_catalogues/' radec_mock = ascii.read(filepath+filename, delimiter='\s', Reader=ascii.Basic) #open the full mock filename = mock+'.hdf5' filepath = cu.get_output_path()+'processed_data/hearin_mocks/custom_catalogues/' f = h5py.File(filepath+filename, 'r') full_mock = f.get(mock) print full_mock.dtype.names ############################################################################################# ind_full_mock = radec_mock['k'][indx['ind']-1] #create output file with galaxy ID and group ID dtype=[('gal_ID','>i8'),('group_ID','>i8')] dtype=np.dtype(dtype) data = np.recarray((len(indx),), dtype=dtype) #fill in data structure. data['gal_ID'] = full_mock['ID_halo'][ind_full_mock] data['group_ID'] = prob['group_id'] #save catalogue = mock print 'saving:', savepath+catalogue+'_radec_mock.hdf5' f = h5py.File(savepath+catalogue+'_radec_mock.hdf5', 'w') dset = f.create_dataset(catalogue+'_radec_mock', data=data) f.close() print 'done.'
def main(): filepath = cu.get_data_path() + 'mpa_DR7_catalogue/' savepath = cu.get_output_path() + 'processed_data/mpa_dr7/' ################################################################# catalogues=['gal_totspecsfr_dr7_v5_2.fits','gal_info_dr7_v5_2.fits','totlgm_dr7_v5_2.fits'] filename = catalogues[0] hdulist1 = fits.open(filepath+filename, memmap=True) data1 = hdulist1[1].data print 'saving as:', savepath+filename[:-5]+'.hdf5' f1 = h5py.File(savepath+filename[:-5]+'.hdf5', 'w') dset1 = f1.create_dataset(filename[:-5], data=data1) filename = catalogues[1] hdulist2 = fits.open(filepath+filename, memmap=True) data2 = hdulist2[1].data print 'saving as:', savepath+filename[:-5]+'.hdf5' f2 = h5py.File(savepath+filename[:-5]+'.hdf5', 'w') dset2 = f2.create_dataset(filename[:-5], data=data2) filename = catalogues[2] hdulist3 = fits.open(filepath+filename, memmap=True) data3 = hdulist3[1].data print 'saving as:', savepath+filename[:-5]+'.hdf5' f3 = h5py.File(savepath+filename[:-5]+'.hdf5', 'w') dset3 = f3.create_dataset(filename[:-5], data=data3) dtype1 = dset1.dtype.descr dtype2 = dset2.dtype.descr dtype12 = dtype2+dtype1 dtype12 = np.dtype(dtype12) print dtype12 print len(dset1), len(dset2) data12 = np.recarray((len(dset2),), dtype=dtype12) for name in dset2.dtype.descr: name = name[0] print name data12[name]=dset2[name] for name in dset1.dtype.descr: name = name[0] print name data12[name]=dset1[name] filename = 'gal_info_gal_totspecsfr_dr7_v5_2' print savepath+filename+'.hdf5' f12 = h5py.File(savepath+filename+'.hdf5', 'w') dset12 = f12.create_dataset(filename, data=data12) dtype3 = dset3.dtype.descr dtype2 = dset2.dtype.descr print dtype3 print dtype2 dtype32 = dtype2+dtype3 dtype32 = np.dtype(dtype32) print dtype32 print len(dset3), len(dset2) data32 = np.recarray((len(dset2),), dtype=dtype32) for name in dset2.dtype.descr: name = name[0] print name data32[name]=dset2[name] for name in dset3.dtype.descr: name = name[0] print name data32[name]=dset3[name] filename = 'gal_info_totlgm_dr7_v5_2' print savepath+filename+'.hdf5' f32 = h5py.File(savepath+filename+'.hdf5', 'w') dset32 = f32.create_dataset(filename, data=data32) print "making combined master catalogue" #alter column names for these two dtype1_c = [] dtype3_c = [] for i in range(len(dtype1)): dtype1_c.append(('sfr_'+dtype1[i][0],dtype1[i][1])) for i in range(len(dtype3)): dtype3_c.append(('sm_'+dtype3[i][0], dtype3[i][1])) dtype1 = np.dtype(dtype1_c) dtype3 = np.dtype(dtype3_c) dtype123 = dtype2 + dtype3_c + dtype1_c dtype123 = np.dtype(dtype123) print dtype123 print len(dset3), len(dset2), len(dset1) data123 = np.recarray((len(dset2),), dtype=dtype123) for name in dset2.dtype.descr: name = name[0] print name data123[name]=dset2[name] original_descr = dset1.dtype.descr for i,name in enumerate(dtype1.descr): name = name[0] original_name = original_descr[i][0] print name, original_name data123[name]=dset1[original_name] original_descr = dset3.dtype.descr for i,name in enumerate(dtype3.descr): name = name[0] original_name = original_descr[i][0] print name, original_name data123[name]=dset3[original_name] filename = 'mpa_dr7' print savepath+filename+'.hdf5' f123 = h5py.File(savepath+filename+'.hdf5', 'w') dset123 = f123.create_dataset(filename, data=data123)
def main(): filepath = cu.get_data_path() + 'NYU_VAGC/' savepath = cu.get_output_path() + 'processed_data/NYU_VAGC/' filenames = os.listdir('/scratch/dac29/data/NYU_VAGC/') filenames = fnmatch.filter(filenames, '*.fits') #turn each catalogue into an hdf5 file for filename in filenames: print 'reading in:', filename hdulist = fits.open(filepath + filename) data = hdulist[1].data print 'saving as:', filename[:-5] + '.hdf5' f = h5py.File(savepath + filename[:-5] + '.hdf5', 'w') dset = f.create_dataset(filename[:-5], data=data) f.close() gc.collect() print 'done.' #adding in the k-correct to z=0 files on 01/14/2015 catalogues = ['kcorrect.none.model.z0.10','kcorrect.nearest.petro.z0.10',\ 'kcorrect.nearest.model.z0.10','kcorrect.none.petro.z0.10',\ 'object_catalog','object_sdss_imaging','object_sdss_spectro',\ 'sersic_catalog', 'collisions.none', 'collisions.nearest',\ 'kcorrect.none.model.z0.00','kcorrect.nearest.petro.z0.00',\ 'kcorrect.nearest.model.z0.00','kcorrect.none.petro.z0.00'] catalogue = 'object_catalog' f1 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset1 = f1.get(catalogue) catalogue = 'object_sdss_imaging' f2 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset2 = f2.get(catalogue) catalogue = 'object_sdss_spectro' f3 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset3 = f3.get(catalogue) catalogue = 'kcorrect.none.model.z0.10' f4 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset4 = f4.get(catalogue) catalogue = 'kcorrect.none.petro.z0.10' f5 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset5 = f5.get(catalogue) catalogue = 'kcorrect.nearest.model.z0.10' f6 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset6 = f6.get(catalogue) catalogue = 'kcorrect.nearest.petro.z0.10' f7 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset7 = f7.get(catalogue) catalogue = 'sersic_catalog' f8 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset8 = f8.get(catalogue) catalogue = 'collisions.none' f9 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset9 = f9.get(catalogue) catalogue = 'collisions.nearest' f10 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset10 = f10.get(catalogue) #adding in the k-correct to z=0 files on 01/14/2015 catalogue = 'kcorrect.none.model.z0.00' f11 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset11 = f11.get(catalogue) catalogue = 'kcorrect.none.petro.z0.00' f12 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset12 = f12.get(catalogue) catalogue = 'kcorrect.nearest.model.z0.00' f13 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset13 = f13.get(catalogue) catalogue = 'kcorrect.nearest.petro.z0.00' f14 = h5py.File(savepath + catalogue + '.hdf5', 'r') dset14 = f14.get(catalogue) dset1_descr = dset1.dtype.descr dset2_descr = dset2.dtype.descr dset3_descr = dset3.dtype.descr dset4_descr = dset4.dtype.descr dset5_descr = dset5.dtype.descr dset6_descr = dset6.dtype.descr dset7_descr = dset7.dtype.descr dset8_descr = dset8.dtype.descr dset9_descr = dset9.dtype.descr #adding in the k-correct to z=0 files on 01/14/2015 dset10_descr = dset10.dtype.descr dset11_descr = dset11.dtype.descr dset12_descr = dset12.dtype.descr dset13_descr = dset13.dtype.descr dset14_descr = dset14.dtype.descr #KCORRECT and ABS mags d1 = (dset4.dtype.descr[7][0] + '_u.none.model.z0.10', dset4.dtype.descr[7][1]) d2 = (dset4.dtype.descr[7][0] + '_g.none.model.z0.10', dset4.dtype.descr[7][1]) d3 = (dset4.dtype.descr[7][0] + '_r.none.model.z0.10', dset4.dtype.descr[7][1]) d4 = (dset4.dtype.descr[7][0] + '_i.none.model.z0.10', dset4.dtype.descr[7][1]) d5 = (dset4.dtype.descr[7][0] + '_z.none.model.z0.10', dset4.dtype.descr[7][1]) d6 = (dset4.dtype.descr[5][0] + '_u.none.model.z0.10', dset4.dtype.descr[5][1]) d7 = (dset4.dtype.descr[5][0] + '_g.none.model.z0.10', dset4.dtype.descr[5][1]) d8 = (dset4.dtype.descr[5][0] + '_r.none.model.z0.10', dset4.dtype.descr[5][1]) d9 = (dset4.dtype.descr[5][0] + '_i.none.model.z0.10', dset4.dtype.descr[5][1]) d10 = (dset4.dtype.descr[5][0] + '_z.none.model.z0.10', dset4.dtype.descr[5][1]) #KCORRECT and ABS mags d11 = (dset5.dtype.descr[7][0] + '_u.none.petro.z0.10', dset5.dtype.descr[7][1]) d12 = (dset5.dtype.descr[7][0] + '_g.none.petro.z0.10', dset5.dtype.descr[7][1]) d13 = (dset5.dtype.descr[7][0] + '_r.none.petro.z0.10', dset5.dtype.descr[7][1]) d14 = (dset5.dtype.descr[7][0] + '_i.none.petro.z0.10', dset5.dtype.descr[7][1]) d15 = (dset5.dtype.descr[7][0] + '_z.none.petro.z0.10', dset5.dtype.descr[7][1]) d16 = (dset5.dtype.descr[5][0] + '_u.none.petro.z0.10', dset5.dtype.descr[5][1]) d17 = (dset5.dtype.descr[5][0] + '_g.none.petro.z0.10', dset5.dtype.descr[5][1]) d18 = (dset5.dtype.descr[5][0] + '_r.none.petro.z0.10', dset5.dtype.descr[5][1]) d19 = (dset5.dtype.descr[5][0] + '_i.none.petro.z0.10', dset5.dtype.descr[5][1]) d20 = (dset5.dtype.descr[5][0] + '_z.none.petro.z0.10', dset5.dtype.descr[5][1]) #KCORRECT and ABS mags d21 = (dset6.dtype.descr[7][0] + '_u.nearest.model.z0.10', dset6.dtype.descr[7][1]) d22 = (dset6.dtype.descr[7][0] + '_g.nearest.model.z0.10', dset6.dtype.descr[7][1]) d23 = (dset6.dtype.descr[7][0] + '_r.nearest.model.z0.10', dset6.dtype.descr[7][1]) d24 = (dset6.dtype.descr[7][0] + '_i.nearest.model.z0.10', dset6.dtype.descr[7][1]) d25 = (dset6.dtype.descr[7][0] + '_z.nearest.model.z0.10', dset6.dtype.descr[7][1]) d26 = (dset6.dtype.descr[5][0] + '_u.nearest.model.z0.10', dset6.dtype.descr[5][1]) d27 = (dset6.dtype.descr[5][0] + '_g.nearest.model.z0.10', dset6.dtype.descr[5][1]) d28 = (dset6.dtype.descr[5][0] + '_r.nearest.model.z0.10', dset6.dtype.descr[5][1]) d29 = (dset6.dtype.descr[5][0] + '_i.nearest.model.z0.10', dset6.dtype.descr[5][1]) d30 = (dset6.dtype.descr[5][0] + '_z.nearest.model.z0.10', dset6.dtype.descr[5][1]) #KCORRECT and ABS mags d31 = (dset7.dtype.descr[7][0] + '_u.nearest.petro.z0.10', dset7.dtype.descr[7][1]) d32 = (dset7.dtype.descr[7][0] + '_g.nearest.petro.z0.10', dset7.dtype.descr[7][1]) d33 = (dset7.dtype.descr[7][0] + '_r.nearest.petro.z0.10', dset7.dtype.descr[7][1]) d34 = (dset7.dtype.descr[7][0] + '_i.nearest.petro.z0.10', dset7.dtype.descr[7][1]) d35 = (dset7.dtype.descr[7][0] + '_z.nearest.petro.z0.10', dset7.dtype.descr[7][1]) d36 = (dset7.dtype.descr[5][0] + '_u.nearest.petro.z0.10', dset7.dtype.descr[5][1]) d37 = (dset7.dtype.descr[5][0] + '_g.nearest.petro.z0.10', dset7.dtype.descr[5][1]) d38 = (dset7.dtype.descr[5][0] + '_r.nearest.petro.z0.10', dset7.dtype.descr[5][1]) d39 = (dset7.dtype.descr[5][0] + '_i.nearest.petro.z0.10', dset7.dtype.descr[5][1]) d40 = (dset7.dtype.descr[5][0] + '_z.nearest.petro.z0.10', dset7.dtype.descr[5][1]) #adding in the k-correct to z=0 files on 01/14/2015 #KCORRECT and ABS mags d1_00 = (dset11.dtype.descr[7][0] + '_u.none.model.z0.00', dset4.dtype.descr[7][1]) d2_00 = (dset11.dtype.descr[7][0] + '_g.none.model.z0.00', dset4.dtype.descr[7][1]) d3_00 = (dset11.dtype.descr[7][0] + '_r.none.model.z0.00', dset4.dtype.descr[7][1]) d4_00 = (dset11.dtype.descr[7][0] + '_i.none.model.z0.00', dset4.dtype.descr[7][1]) d5_00 = (dset11.dtype.descr[7][0] + '_z.none.model.z0.00', dset4.dtype.descr[7][1]) d6_00 = (dset11.dtype.descr[5][0] + '_u.none.model.z0.00', dset4.dtype.descr[5][1]) d7_00 = (dset11.dtype.descr[5][0] + '_g.none.model.z0.00', dset4.dtype.descr[5][1]) d8_00 = (dset11.dtype.descr[5][0] + '_r.none.model.z0.00', dset4.dtype.descr[5][1]) d9_00 = (dset11.dtype.descr[5][0] + '_i.none.model.z0.00', dset4.dtype.descr[5][1]) d10_00 = (dset11.dtype.descr[5][0] + '_z.none.model.z0.00', dset4.dtype.descr[5][1]) #KCORRECT and ABS mags d11_00 = (dset12.dtype.descr[7][0] + '_u.none.petro.z0.00', dset5.dtype.descr[7][1]) d12_00 = (dset12.dtype.descr[7][0] + '_g.none.petro.z0.00', dset5.dtype.descr[7][1]) d13_00 = (dset12.dtype.descr[7][0] + '_r.none.petro.z0.00', dset5.dtype.descr[7][1]) d14_00 = (dset12.dtype.descr[7][0] + '_i.none.petro.z0.00', dset5.dtype.descr[7][1]) d15_00 = (dset12.dtype.descr[7][0] + '_z.none.petro.z0.00', dset5.dtype.descr[7][1]) d16_00 = (dset12.dtype.descr[5][0] + '_u.none.petro.z0.00', dset5.dtype.descr[5][1]) d17_00 = (dset12.dtype.descr[5][0] + '_g.none.petro.z0.00', dset5.dtype.descr[5][1]) d18_00 = (dset12.dtype.descr[5][0] + '_r.none.petro.z0.00', dset5.dtype.descr[5][1]) d19_00 = (dset12.dtype.descr[5][0] + '_i.none.petro.z0.00', dset5.dtype.descr[5][1]) d20_00 = (dset12.dtype.descr[5][0] + '_z.none.petro.z0.00', dset5.dtype.descr[5][1]) #KCORRECT and ABS mags d21_00 = (dset13.dtype.descr[7][0] + '_u.nearest.model.z0.00', dset6.dtype.descr[7][1]) d22_00 = (dset13.dtype.descr[7][0] + '_g.nearest.model.z0.00', dset6.dtype.descr[7][1]) d23_00 = (dset13.dtype.descr[7][0] + '_r.nearest.model.z0.00', dset6.dtype.descr[7][1]) d24_00 = (dset13.dtype.descr[7][0] + '_i.nearest.model.z0.00', dset6.dtype.descr[7][1]) d25_00 = (dset13.dtype.descr[7][0] + '_z.nearest.model.z0.00', dset6.dtype.descr[7][1]) d26_00 = (dset13.dtype.descr[5][0] + '_u.nearest.model.z0.00', dset6.dtype.descr[5][1]) d27_00 = (dset13.dtype.descr[5][0] + '_g.nearest.model.z0.00', dset6.dtype.descr[5][1]) d28_00 = (dset13.dtype.descr[5][0] + '_r.nearest.model.z0.00', dset6.dtype.descr[5][1]) d29_00 = (dset13.dtype.descr[5][0] + '_i.nearest.model.z0.00', dset6.dtype.descr[5][1]) d30_00 = (dset13.dtype.descr[5][0] + '_z.nearest.model.z0.00', dset6.dtype.descr[5][1]) #KCORRECT and ABS mags d31_00 = (dset14.dtype.descr[7][0] + '_u.nearest.petro.z0.00', dset7.dtype.descr[7][1]) d32_00 = (dset14.dtype.descr[7][0] + '_g.nearest.petro.z0.00', dset7.dtype.descr[7][1]) d33_00 = (dset14.dtype.descr[7][0] + '_r.nearest.petro.z0.00', dset7.dtype.descr[7][1]) d34_00 = (dset14.dtype.descr[7][0] + '_i.nearest.petro.z0.00', dset7.dtype.descr[7][1]) d35_00 = (dset14.dtype.descr[7][0] + '_z.nearest.petro.z0.00', dset7.dtype.descr[7][1]) d36_00 = (dset14.dtype.descr[5][0] + '_u.nearest.petro.z0.00', dset7.dtype.descr[5][1]) d37_00 = (dset14.dtype.descr[5][0] + '_g.nearest.petro.z0.00', dset7.dtype.descr[5][1]) d38_00 = (dset14.dtype.descr[5][0] + '_r.nearest.petro.z0.00', dset7.dtype.descr[5][1]) d39_00 = (dset7.dtype.descr[5][0] + '_i.nearest.petro.z0.00', dset7.dtype.descr[5][1]) d40_00 = (dset7.dtype.descr[5][0] + '_z.nearest.petro.z0.00', dset7.dtype.descr[5][1]) d_sersic = (dset8.dtype.descr[2][0] + '_r', dset7.dtype.descr[5][1]) #adding in the k-correct to z=0 files on 01/14/2015 dtype = np.dtype([dset1_descr[0],dset1_descr[1],dset1_descr[2],dset10_descr[3],dset3_descr[28],\ dset10_descr[9],dset10_descr[7],dset3_descr[37],dset3_descr[38],dset3_descr[0],d_sersic,\ d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19,d20,\ d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31,d32,d33,d34,d35,d36,d37,d38,d39,d40,\ d1_00,d2_00,d3_00,d4_00,d5_00,d6_00,d7_00,d8_00,d9_00,d10_00,\ d11_00,d12_00,d13_00,d14_00,d15_00,d16_00,d17_00,d18_00,d19_00,d20_00,\ d21_00,d22_00,d23_00,d24_00,d25_00,d26_00,d27_00,d28_00,d29_00,d30_00,\ d31_00,d32_00,d33_00,d34_00,d35_00,d36_00,d37_00,d38_00,d39_00,d40_00]) N_columns = len(dtype) i = 0 print ' ' for name in dtype.descr: print i, name i = i + 1 data = np.recarray((len(dset1), ), dtype=dtype) data[data.dtype.descr[0][0]] = dset1[data.dtype.descr[0][0]] #ID data[data.dtype.descr[1][0]] = dset1[data.dtype.descr[1][0]] #ra data[data.dtype.descr[2][0]] = dset1[data.dtype.descr[2][0]] #dec data[data.dtype.descr[3][0]] = dset10[data.dtype.descr[3][0]] #z data[data.dtype.descr[4][0]] = dset3[data.dtype.descr[4][0]] #zerr data[data.dtype.descr[5][0]] = dset10[data.dtype.descr[5][0]] #ztype data[data.dtype.descr[6][0]] = dset10[data.dtype.descr[6] [0]] #neighbor used data[data.dtype.descr[7][0]] = dset3[data.dtype.descr[7][0]] #vdisp data[data.dtype.descr[8][0]] = dset3[data.dtype.descr[8][0]] #vdisperr data[data.dtype.descr[9][0]] = dset3[data.dtype.descr[9] [0]] #SDSS_SPECTRO_TAG x = np.column_stack(dset8['SERSIC_N']) data[data.dtype.descr[10][0]] = x[0] #N_sersic r_band y = np.column_stack(dset4['KCORRECT']) x = np.column_stack(dset4['ABSMAG']) for i in range(0, 5): #7-11 + 3 etc... print i, data.dtype.descr[i + 7 + 4][0] data[data.dtype.descr[i + 7 + 4][0]] = y[i] for i in range(0, 5): #12-16 print i, data.dtype.descr[i + 12 + 4] data[data.dtype.descr[i + 12 + 4][0]] = x[i] y = np.column_stack(dset5['KCORRECT']) x = np.column_stack(dset5['ABSMAG']) for i in range(0, 5): #17-21 print i, data.dtype.descr[i + 17 + 4] data[data.dtype.descr[i + 17 + 4][0]] = y[i] for i in range(0, 5): #22-26 print i, data.dtype.descr[i + 22 + 4] data[data.dtype.descr[i + 22 + 4][0]] = x[i] y = np.column_stack(dset6['KCORRECT']) x = np.column_stack(dset6['ABSMAG']) for i in range(0, 5): #27-31 print i, data.dtype.descr[i + 27 + 4] data[data.dtype.descr[i + 27 + 4][0]] = y[i] for i in range(0, 5): #32-36 print i, data.dtype.descr[i + 32 + 4] data[data.dtype.descr[i + 32 + 4][0]] = x[i] y = np.column_stack(dset7['KCORRECT']) x = np.column_stack(dset7['ABSMAG']) for i in range(0, 5): #37-41 print i, data.dtype.descr[i + 37 + 4] data[data.dtype.descr[i + 37 + 4][0]] = y[i] for i in range(0, 5): #42-46 print i, data.dtype.descr[i + 42 + 4] data[data.dtype.descr[i + 42 + 4][0]] = x[i] #adding in the k-correct to z=0 files on 01/14/2015 y = np.column_stack(dset11['KCORRECT']) x = np.column_stack(dset11['ABSMAG']) for i in range(0, 5): #47-51 + 3 etc... print i, data.dtype.descr[i + 47 + 4][0] data[data.dtype.descr[i + 47 + 4][0]] = y[i] for i in range(0, 5): #52-56 print i, data.dtype.descr[i + 52 + 4] data[data.dtype.descr[i + 52 + 4][0]] = x[i] y = np.column_stack(dset12['KCORRECT']) x = np.column_stack(dset12['ABSMAG']) for i in range(0, 5): #57-61 print i, data.dtype.descr[i + 57 + 4] data[data.dtype.descr[i + 57 + 4][0]] = y[i] for i in range(0, 5): #62-66 print i, data.dtype.descr[i + 62 + 4] data[data.dtype.descr[i + 62 + 4][0]] = x[i] y = np.column_stack(dset13['KCORRECT']) x = np.column_stack(dset13['ABSMAG']) for i in range(0, 5): #67-71 print i, data.dtype.descr[i + 67 + 4] data[data.dtype.descr[i + 67 + 4][0]] = y[i] for i in range(0, 5): #72-76 print i, data.dtype.descr[i + 72 + 4] data[data.dtype.descr[i + 72 + 4][0]] = x[i] y = np.column_stack(dset14['KCORRECT']) x = np.column_stack(dset14['ABSMAG']) for i in range(0, 5): #77-81 print i, data.dtype.descr[i + 77 + 4] data[data.dtype.descr[i + 77 + 4][0]] = y[i] for i in range(0, 5): #82-86 print i, data.dtype.descr[i + 82 + 4] data[data.dtype.descr[i + 82 + 4][0]] = x[i] #save the resultant table filename = 'nyu_vagc_dr7' f = h5py.File(savepath + filename + '.hdf5', 'w') dset = f.create_dataset(filename, data=data) f.close()
def main(): ###################################################################################### savepath = cu.get_output_path() + 'processed_data/mpa_dr7/' ###################################################################################### #open catalogue catalogue = 'mpa_dr7' filepath = cu.get_output_path() + 'processed_data/mpa_dr7/' f = h5py.File(filepath + catalogue + '.hdf5', 'r') dset = f.get(catalogue) dset = np.array(dset) print dset.dtype.names print "number of entries in the catalogue:", len( np.unique(dset['SN_MEDIAN'])) filepath = cu.get_data_path() + 'mpa_DR7_catalogue/' f = open(filepath + "all_matches_dr7.dat", 'r') highest_sn_inds = [] #read in lines as a list of stings lines = [line for line in f] print "number of lines:", len(lines) #split lines into entries values = [line.split() for line in lines] #convert strings into integers and remove negative integers values = [[int(y) for y in x if int(y) > -1] for x in values] #put them in order for each line values = [np.sort(x) for x in values] #how may entries per object? N = [1.0 / len(x) for x in values] print "total number of unique objects:", np.sum( N) #doesn't agree with quoted value! #which value in each line gives the highest S/N? max_inds = [np.argmax(dset['SN_MEDIAN'][inds]) for inds in values] #get a list of the indices of the highest S/N objects per entry highest_sn_inds = [x[y] for x, y in zip(values, max_inds)] #remove duplicates unique_objects = np.unique(highest_sn_inds) #save indices as numpy array np.save(savepath + 'unique_objects', unique_objects) #save indices as ascii table from astropy.table import Table data = Table([unique_objects], names=['ind']) ascii.write(data, savepath + 'unique_objects.dat') ascii.write(data, './unique_objects.dat') #save a catalogue with only unique galaxies inds = np.arange(0, len(dset)) keep = np.in1d(inds, unique_objects) data = dset[keep] filename = 'mpa_dr7_unique' f1 = h5py.File(savepath + filename + '.hdf5', 'w') dset1 = f1.create_dataset(filename, data=data)
def main(): i = int(sys.argv[1]) version = sys.argv[2] filepath = cu.get_data_path()+'Yang_groupcat/mock_runs/4th_run/version_'+version+'/' savepath = cu.get_output_path()+'processed_data/yang_groupcat/mock_runs/4th_run/version_'+version+'/' catalogues = ['Mr19_age_distribution_matching_mock_radec_mock',\ 'Mr19_age_distribution_matching_mock_sys_empty_shuffle_satrel_shuffle_radec_mock'] filenames_1 = ['imock1_1','imock2_1'] filenames_2 = ['imock1_2','imock2_2'] filenames_3 = ['mock1_L_m','mock2_L_m'] names_1 = ['IDgal','ID','GROUP_ID','cen'] names_2 = ['GROUP_ID','ID','IDgal'] names_3 = ['GROUP_ID','ra','dec','z','L19','MGROUP'] catalogue = catalogues[i] filename = filenames_1[i] print 'open file 1/3...' data_1 = ascii.read(filepath+filename,names=names_1,delimiter='\s',data_start=0,format='no_header') filename = filenames_2[i] print 'open file 2/3...' data_2 = ascii.read(filepath+filename,names=names_2,delimiter='\s',data_start=0,format='no_header') filename = filenames_3[i] print 'open file 3/3...' data_3 = ascii.read(filepath+filename,names=names_3,delimiter='\s',data_start=0,format='no_header') print len(data_1), len(data_2), len(data_3) #determine index from data_3 into data_1 index = np.argsort(data_3['GROUP_ID']) sorted_3 = data_3['GROUP_ID'][index] sorted_index = np.searchsorted(sorted_3,data_1['GROUP_ID']) ind = np.take(index, sorted_index, mode="clip") mask = data_3['GROUP_ID'][ind] != data_1['GROUP_ID'] result = np.ma.array(ind, mask=mask) """ #combine different files into one data structure dtype=[('gal_ID_1','>i8'),('gal_ID_2','>i8'),('group_ID','>i8'),('brightest','>i8'),\ ('ra_cen','>f8'),('dec_cen','>f8'),('z_cen','>f8'),('group_L19','>f8'),('halo_mass','>f8')] dtype=np.dtype(dtype) data = np.recarray((len(data_1),),dtype=dtype) #fill in data structure. data['gal_ID_1'] = data_1['IDgal'] data['gal_ID_2'] = data_1['ID'] data['group_ID'] = data_1['GROUP_ID'] data['brightest'] = data_1['cen'] data['ra_cen'] = data_3['ra'][result] data['dec_cen'] = data_3['dec'][result] data['z_cen'] = data_3['z'][result] data['group_L19'] = data_3['L19'][result] data['halo_mass'] = data_3['MGROUP'][result] #save print 'saving:', savepath+catalogue+'.hdf5' f = h5py.File(savepath+catalogue+'.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() print 'done.' """ #create output file with galaxy ID and group ID dtype=[('gal_ID','>i8'),('group_ID','>i8')] dtype=np.dtype(dtype) data = np.recarray((len(data_1),),dtype=dtype) #fill in data structure. data['gal_ID'] = data_1['ID'] data['group_ID'] = data_1['GROUP_ID'] #save print 'saving:', savepath+catalogue+'.hdf5' f = h5py.File(savepath+catalogue+'.hdf5', 'w') dset = f.create_dataset(catalogue, data=data) f.close() print 'done.'
def main(): filepath = cu.get_data_path() + 'CFHTLens/queries/' savepath = cu.get_output_path() + 'processed_data/CFHTLens/catalogues/' filenames = glob.glob('/scratch/dac29/data/CFHTLens/queries/*.tsv') for filename in filenames: print filename field = 'test' print 'reading in:', field filename = 'test.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath + field + '.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() gc.collect() print 'done.' ''' #let's split this field into two parts... field='W1' print 'reading in:',field, 'part 1' filename = 'CFHTLens_2013-09-03T20:10:04.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic, data_end=5114944) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'_1.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' field='W1' print 'reading in:',field, 'part 2' filename = 'CFHTLens_2013-09-03T20:10:04.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic, data_start=5114945) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'_2.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' #combine the wo parts into one big set!!! field = 'W1' f1 = h5py.File(savepath+field+'_1.hdf5', 'r') dset1 = f1.get(field) f2 = h5py.File(savepath+field+'_2.hdf5', 'r') dset2 = f2.get(field) f = h5py.File(savepath+field+'.hdf5', 'w') print dset1.dtype length = len(dset1)+len(dset2) dset = f.create_dataset(field, (length,), dtype=dset1.dtype) print 'len(dset1)=',len(dset1),'len(dset2)=', len(dset2),\ 'len(dset1)+len(dset2)=', len(dset1)+len(dset2), 'len(dset)=', len(dset) print len(dset[0:len(dset1)]), len(dset[len(dset1):len(dset1)+len(dset2)+1]) print len(dset[0:len(dset1)]), len(dset[len(dset1):len(dset1)+len(dset2)]) print 0,len(dset1),len(dset1)+1, len(dset1)+len(dset2)+1 dset[0:len(dset1)] = dset1 dset[len(dset1):len(dset1)+len(dset2)+1] = dset2 f1.close() f2.close() f.close() dset=0 dset1=0 dset2=0 gc.collect() ''' field = 'W2' print 'reading in:', field filename = 'CFHTLens_2013-09-03T20:15:29.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath + field + '.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' ''' field='W3' print 'reading in:',field filename = 'CFHTLens_2013-09-03T20:16:44.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath+field+'.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.' ''' field = 'W4' print 'reading in:', field filename = 'CFHTLens_2013-09-03T20:18:01.tsv' data = ascii.read(filepath+filename, delimiter='\t', \ guess=True, Reader=ascii.Basic) print 'saving as hdf5 file...' f = h5py.File(savepath + field + '.hdf5', 'w') dset = f.create_dataset(field, data=data) f.close() data = 0 gc.collect() print 'done.'
def main(): filepath = cu.get_data_path() + 'mpa_DR7_catalogue/' savepath = cu.get_output_path() + 'processed_data/mpa_dr7/' ################################################################# catalogues = [ 'gal_totspecsfr_dr7_v5_2.fits', 'gal_info_dr7_v5_2.fits', 'totlgm_dr7_v5_2.fits' ] filename = catalogues[0] hdulist1 = fits.open(filepath + filename, memmap=True) data1 = hdulist1[1].data print 'saving as:', savepath + filename[:-5] + '.hdf5' f1 = h5py.File(savepath + filename[:-5] + '.hdf5', 'w') dset1 = f1.create_dataset(filename[:-5], data=data1) filename = catalogues[1] hdulist2 = fits.open(filepath + filename, memmap=True) data2 = hdulist2[1].data print 'saving as:', savepath + filename[:-5] + '.hdf5' f2 = h5py.File(savepath + filename[:-5] + '.hdf5', 'w') dset2 = f2.create_dataset(filename[:-5], data=data2) filename = catalogues[2] hdulist3 = fits.open(filepath + filename, memmap=True) data3 = hdulist3[1].data print 'saving as:', savepath + filename[:-5] + '.hdf5' f3 = h5py.File(savepath + filename[:-5] + '.hdf5', 'w') dset3 = f3.create_dataset(filename[:-5], data=data3) dtype1 = dset1.dtype.descr dtype2 = dset2.dtype.descr dtype12 = dtype2 + dtype1 dtype12 = np.dtype(dtype12) print dtype12 print len(dset1), len(dset2) data12 = np.recarray((len(dset2), ), dtype=dtype12) for name in dset2.dtype.descr: name = name[0] print name data12[name] = dset2[name] for name in dset1.dtype.descr: name = name[0] print name data12[name] = dset1[name] filename = 'gal_info_gal_totspecsfr_dr7_v5_2' print savepath + filename + '.hdf5' f12 = h5py.File(savepath + filename + '.hdf5', 'w') dset12 = f12.create_dataset(filename, data=data12) dtype3 = dset3.dtype.descr dtype2 = dset2.dtype.descr print dtype3 print dtype2 dtype32 = dtype2 + dtype3 dtype32 = np.dtype(dtype32) print dtype32 print len(dset3), len(dset2) data32 = np.recarray((len(dset2), ), dtype=dtype32) for name in dset2.dtype.descr: name = name[0] print name data32[name] = dset2[name] for name in dset3.dtype.descr: name = name[0] print name data32[name] = dset3[name] filename = 'gal_info_totlgm_dr7_v5_2' print savepath + filename + '.hdf5' f32 = h5py.File(savepath + filename + '.hdf5', 'w') dset32 = f32.create_dataset(filename, data=data32) print "making combined master catalogue" #alter column names for these two dtype1_c = [] dtype3_c = [] for i in range(len(dtype1)): dtype1_c.append(('sfr_' + dtype1[i][0], dtype1[i][1])) for i in range(len(dtype3)): dtype3_c.append(('sm_' + dtype3[i][0], dtype3[i][1])) dtype1 = np.dtype(dtype1_c) dtype3 = np.dtype(dtype3_c) dtype123 = dtype2 + dtype3_c + dtype1_c dtype123 = np.dtype(dtype123) print dtype123 print len(dset3), len(dset2), len(dset1) data123 = np.recarray((len(dset2), ), dtype=dtype123) for name in dset2.dtype.descr: name = name[0] print name data123[name] = dset2[name] original_descr = dset1.dtype.descr for i, name in enumerate(dtype1.descr): name = name[0] original_name = original_descr[i][0] print name, original_name data123[name] = dset1[original_name] original_descr = dset3.dtype.descr for i, name in enumerate(dtype3.descr): name = name[0] original_name = original_descr[i][0] print name, original_name data123[name] = dset3[original_name] filename = 'mpa_dr7' print savepath + filename + '.hdf5' f123 = h5py.File(savepath + filename + '.hdf5', 'w') dset123 = f123.create_dataset(filename, data=data123)