print("%7d, %8.4f, %8.4f, %7d, %8.4f, %8.4f"%(g1_num+1, -g1_e, g1_e, g2_num+1, -g2_e, g2_e)) print(select_cri, select_scale) comm.Barrier() # start cutoff mcs = [[], []] # fg1 & fg2 for i in range(2): cat_data_path = data_path + "cata_%s_g%d.hdf5"%(result_source, i+1) # tasks distribution # if cpus <= total tasks, each core will get at least one task, # labeled by a number (0 ~ total_task) # if cpus > total tasks, some cores will get one task, # and the other will get nothing, a empty task list '[]'. tasks = tool_box.allot([i for i in range(gnums[i])], cpus)[rank] if len(tasks) != 0: for ig in tasks: # read the data set_name = "/fg%d_%d" % (i + 1, ig) h5f = h5py.File(cat_data_path, "r") data = h5f[set_name].value h5f.close() nstar = data[:, 0] idx_s = nstar >= nstar_thres area = data[:, 1] idx_a = area >= area_thres selected_data = data[idx_s & idx_a]
# PSF psf = galsim.Moffat(beta=3.5, fwhm=0.7, flux=1.0, trunc=2) if rank == 0: psf_img = galsim.ImageD(stamp_size, stamp_size) psf.drawImage(image=psf_img, scale=pixel_scale) hdu = fits.PrimaryHDU(psf_img.array) psf_path = total_path + 'psf.fits' hdu.writeto(psf_path, overwrite=True) logger.info( "desti: %s, size: %d, pixel_scale: %.3f, noise_sig: %.2f, total galaxy number: %d" % (source, stamp_size, pixel_scale, noise_sig, total_chips_num)) logger.info("seed: %d" % seed) # task allocation chip_tags = [i for i in range(total_chips_num)] chip_tags_rank = tool_box.allot(chip_tags, cpus)[rank] snr_data_path = para_path + "snr.npz" snr_data = numpy.load(snr_data_path)["arr_0"] for shear_id in range(shear_num): shear_cata = para_path + "shear.npz" shear = numpy.load(shear_cata) g1 = shear["arr_0"][shear_id] g2 = shear["arr_1"][shear_id] paras = para_path + "para_%d.hdf5" % shear_id f = h5py.File(paras, 'r') e1s = f["/e1"].value e2s = f["/e2"].value radius = f["/radius"].value
[dec_min + i * block_scale for i in range(grid_rows + 1)]) ra_bin = numpy.array([ra_min + i * block_scale for i in range(grid_cols + 1)]) if rank == 0: fig, ax = plt.subplots(figsize=(10, 10)) nums = numpy.histogram2d(dec, ra, [dec_bin, ra_bin])[0] numpy.savez(pic_path + "w_%d_num.npz" % (rank + 1), nums) im = ax.imshow(nums) fig.colorbar(im) plt.savefig(pic_path + "w_%d_num.png" % (rank + 1), bbox_inches='tight') plt.close() # [g1, g2] shear_array = numpy.zeros((grid_rows, grid_cols * 4)) grid_ids = [(i, j) for i in range(grid_rows) for j in range(grid_cols)] my_grid = tool_box.allot(grid_ids, cpus)[rank] for grid in my_grid: row, col = grid dec_1, dec_2 = dec_min + row * block_scale, dec_min + (row + 1) * block_scale ra_1, ra_2 = ra_min + col * block_scale, ra_min + (col + 1) * block_scale idx_r1 = dec >= dec_1 idx_r2 = dec < dec_2 idx_c1 = ra >= ra_1 idx_c2 = ra < ra_2 idxs = idx_r1 & idx_r2 & idx_c1 & idx_c2
if rank == 0: h5f = h5py.File(total_cata_path + "cfht_cata/cata.hdf5", "w") h5f.close() gal_count = 0 num_in_field = [] for area_id in range(1, 1 + area_num): field_paths = [] field_count = 0 for field in fields: if "w%d" % area_id in field: field_path = total_cata_path + "cfht_cata/%s.dat" % field field_paths.append(field_path) field_count += 1 sub_fields = tool_box.allot(field_paths, cpus)[rank] fcount = 0 data = None for field_name in sub_fields: t11 = time.time() try: temp = numpy.loadtxt(field_name) if fcount == 0: data = temp.copy() else: data = numpy.row_stack((data, temp)) fcount += 1 except: print(rank, " can't find ", field_name) t12 = time.time()
h = 0.7 C_0_hat = 2.99792458 H_0 = 100 * h coeff = 1000 * C_0_hat # read foreground h5f = h5py.File("./data/%s_sub.hdf5" % area_id, "r") RA_f = h5f["/RA"].value DEC_f = h5f["/DEC"].value Z_f = h5f["/Z"].value DIST_f = h5f["/DISTANCE"].value[:, 0] h5f.close() fore_num = Z_f.shape[0] tasks = [i for i in range(fore_num)] my_task = tool_box.allot(tasks, cpus)[rank] print("Rank: %d. My: %d ~ %d" % (rank, min(my_task), max(my_task))) # read background # area_id = "/w_1" background_path = "/mnt/perc/hklee/CFHT/gg_lensing/data/cfht_cata_grid.hdf5" h5f = h5py.File(background_path, "r") Z = h5f["%s/Z" % area_id].value[:, 0] back_num = Z.shape[0] back_label = numpy.arange(back_num) BACK_DATA = numpy.zeros((Z.shape[0], 8)) BACK_DATA[:, 0] = back_label BACK_DATA[:, 1] = h5f["%s/RA" % area_id].value[:, 0]
# dz = 10./(num-1) # delta_dist = numpy.zeros((num,)) # # for i in range(num): # z = dz*i # com_dist = cosmos.comoving_distance(z).value*h # t2 = time.time() # print(t2-t1) # exit() h5f = h5py.File(data_path, "r") redshift = h5f["%sZ" % set_name].value total_num = redshift.shape[0] my_list = numpy.array( tool_box.allot([i for i in range(total_num)], cpus)[rank]) ch_pts = numpy.random.choice(my_list, ch_num, replace=False).tolist() # ch_pts = [i for i in range(total_num)] com_dist_file = h5f["%sDISTANCE" % set_name].value[ch_pts] com_dist_integ_file = h5f["%sDISTANCE_INTEG" % set_name].value[ch_pts] redshift = redshift[ch_pts] h5f.close() delta_dist = numpy.zeros((2, ch_num)) astro_dist = numpy.zeros((2, ch_num)) for i in range(1, ch_num): com_dist = cosmos.comoving_distance(redshift[i]).value * h
total_field_pool = [] if filter_exist: if rank == 0: print("Stack the sex snr") with open(filter_path, "r") as f: contents = f.readlines() for area in contents: area = area.split("\n")[0] if "w" in area: total_field_pool.append(area) else: if rank == 0: print("Filtered list does not exist!") total_field_pool = fields field_pool = tool_box.allot(total_field_pool, cpus)[rank] else: field_pool = tool_box.allot(fields, cpus)[rank] absnr1 = [] absnr2 = [] empty_chip = [] snr_expos = [] for field in field_pool: expos = list(cfht_dict[field].keys()) for expo in expos: chip_num = 0 expo_path = field_path + "%s/%s/" % (field, expo) sex_npz_path = field_path + "%s/%s_sex.npz" % (field, expo) if jobs == "stack":
import tool_box from subprocess import Popen from mpi4py import MPI import time comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() ts = time.time() data_path = "/mw/w1234/original/" nm_path = data_path + "nname.dat" fields = tool_box.field_dict(nm_path)[1] missions = tool_box.allot(fields, cpus)[rank] store_path = "/mnt/ddnfs/data_users/hkli/CFHT/catalog/" for dirs in ["result", "result_int", "result_ext"]: for field in missions: try: cmd = "scp -r /mw/w1234/original/%s/%s/ " \ "[email protected]:/mnt/ddnfs/data_users/hkli/CFHT/catalog/%s/"\ %(field, dirs, field) # print(cmd) a = Popen(cmd, shell=True) a.wait() except: print("FAILED %s" % field)
filter_names = ["sex2_4", "sex4_4", "sex2_2", "sex4_2", "sex2_1.5", "sex4_1.5"] gauss_filters = ["gauss_2.0_5x5", "gauss_4.0_7x7", "gauss_2.0_5x5", "gauss_4.0_7x7", "gauss_2.0_5x5", "gauss_4.0_7x7"] if sex_filter not in filter_names: print("%s not found in "%sex_filter, filter_names) raise KeyError total_fits_path = [total_path + "/%d/gal_chip_%04d.fits" % (i, j) for i in range(shear_num) for j in range(chip_num)] total_cat_paths = [total_path + "/result/sex_cat/%s/cat/%d_gal_chip_%04d.fits.cat" % (sex_filter, i, j) for i in range(shear_num) for j in range(chip_num)] allot_fits_path = tool_box.allot(total_fits_path, cpus)[rank] allot_cat_path = tool_box.allot(total_cat_paths, cpus)[rank] if rank == 0: with open("./default.sex_ori", "r") as f: contents = f.readlines() sig_level = float(sex_filter.split("_")[1])*noise_sig contents[16] = "DETECT_THRESH %.2f # <sigmas> or <threshold>,<ZP> in mag.arcsec-2\n"%sig_level contents[18] = "ANALYSIS_THRESH %.2f # <sigmas> or <threshold>,<ZP> in mag.arcsec-2\n"%sig_level contents[23] = "FILTER_NAME %s.conv # name of the file containing the filter\n"%gauss_filters[filter_names.index(sex_filter)] with open("./default.sex", "w") as f: f.writelines(contents) cat_path = total_path + "/result/sex_cat/%s/cat/"%sex_filter try: os.makedirs(cat_path)
# log_path = "/home/hkli/work/test/log/log_%d.dat"%rank cata_path = "/mnt/perc/hklee/CFHT/catalog/cfht_cata/" log_path = "/home/hklee/work/test/log/log_%d.dat" % rank logger = tool_box.get_logger(log_path) t1 = time.time() if rank == 0: cata_name_src = [] names_src = os.listdir(cata_path + "field_dat/") for nms in names_src: if "w" in nms and ".dat" in nms and "_new" not in nms and "-pz" not in nms: cata_name_src.append(nms.split(".")[0]) name_src_list = tool_box.allot(cata_name_src, cpus) print( rank, "Before, I got %d files in %d sub-lists" % (len(cata_name_src), len(name_src_list))) else: name_src_list = None sub_src_list = comm.scatter(name_src_list, root=0) print(rank, "I got %d files" % len(sub_src_list)) # data is the catalog contains additional parameters, "###.tsv" # read all data from each area for i in range(1, 5): h5f = h5py.File(cata_path + "CFHT_W%d.hdf5" % i, "r")
nbytes_data = 0 nbytes = 0 win1 = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm) win2 = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm) win3 = MPI.Win.Allocate_shared(nbytes_data, itemsize, comm=comm) buf1, itemsize = win1.Shared_query(0) buf2, itemsize = win2.Shared_query(0) buf3, itemsize = win3.Shared_query(0) result_1 = numpy.ndarray(buffer=buf1, dtype='d', shape=(len(ch_num), shear_num*2 + 2)) # g1 result_2 = numpy.ndarray(buffer=buf2, dtype='d', shape=(len(ch_num), shear_num*2 + 2)) # g2 total_data_buf = numpy.ndarray(buffer=buf3, dtype='d', shape=(shear_num*gal_each_shear, 5)) # g2 my_task = tool_box.allot([i for i in range(shear_num)], cpus)[rank] print("Rank: %d. "%rank, my_task) fq = Fourier_Quad(12,11) for ig in my_task: data_path = parent_path + "shear_%d.hdf5"%ig h5f = h5py.File(data_path,"r") data = h5f["/data"].value h5f.close() mg1 = data[:,0] mg2 = data[:,1] mnu1 = data[:,2] + data[:,3] mnu2 = data[:,2] - data[:,3] total_data_buf[ig*gal_each_shear:(ig+1)*gal_each_shear] = data for i in range(len(ch_num)): gh1, gh1_sig = fq.fmin_g_new(g=mg1[:ch_num[i]], nu=mnu1[:ch_num[i]], bin_num=8, scale=100)[:2]
mnu1 = mnu1[idx] mnu2 = mnu2[idx] itemsize = MPI.DOUBLE.Get_size() if rank == 0: # bytes for 10 double elements nbytes = fg2_bin_num * 4 * itemsize else: nbytes = 0 win1 = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm) buf1, itemsize = win1.Shared_query(0) result = numpy.ndarray(buffer=buf1, dtype='d', shape=(4, fg2_bin_num)) comm.Barrier() my_g1 = tool_box.allot([i for i in range(fg1_bin_num)], cpus)[rank] for i in my_g1: idx_1 = fg1 >= fg1_bin[i] idx_2 = fg1 < fg1_bin[i + 1] idx_t = idx_1 & idx_2 g1, g1_sig = fq.fmin_g_new(mg1[idx_t], mnu1[idx_t], 8, fit_num=20)[:2] result[0, i] = g1 result[1, i] = g1_sig my_g2 = tool_box.allot([i for i in range(fg2_bin_num)], cpus)[rank] for i in range(fg2_bin_num): idx_1 = fg2 >= fg2_bin[i] idx_2 = fg2 < fg2_bin[i + 1] idx_t = idx_1 & idx_2 g2, g2_sig = fq.fmin_g_new(mg2[idx_t], mnu2[idx_t], 8, fit_num=20)[:2] result[2, i] = g2
block_boundx = grid_h5f["/background/w_%d/block_boundx" % area_id].value dec_bin = grid_h5f["/background/w_%d/DEC_bin" % area_id].value ra_bin = grid_h5f["/background/w_%d/RA_bin" % area_id].value grid_shape = grid_h5f["/background/w_%d" % area_id].attrs["grid_shape"] grid_ny = grid_shape[0] grid_nx = grid_shape[1] grid_num = grid_ny * grid_nx tasks = [i for i in range(grid_num)] if rank == 0: print("Area: %d, grid: %d (%d, %d)" % (area_id, grid_num, grid_ny, grid_nx)) logger.info("Area: %d, grid: %d (%d, %d)" % (area_id, grid_num, grid_ny, grid_nx)) my_grids = tool_box.allot(tasks, cpus)[rank] # check col by col for i in range(10): # the original data and the grid data pre_datas = pre_h5f[pre_set_names[i]].value grid_datas = grid_h5f[grid_set_names[i]].value logger.info("Area: %d, %s" % (area_id, data_name[i])) for ig in my_grids: row, col = divmod(ig, grid_nx) block_s_id = block_start[ig, 0] block_e_id = block_end[ig, 0] grod_data_i = grid_datas[block_s_id:block_e_id]
warnings.filterwarnings("error") comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() with open("/home/hkli/work/envs/envs.dat", "r") as f: contents = f.readlines() for path in contents: if "cfht_data_path" in path: data_path = path.split("=")[1] elif "cfht_field_path" in path: field_path = path.split("=")[1] cfht_dict, fields = tool_box.field_dict(data_path + "nname.dat") field_pool = tool_box.allot(fields, cpus) for field in field_pool[rank]: expos = list(cfht_dict[field].keys()) f_path = field_path + field + "/" for expo in expos: i = 0 for chip in cfht_dict[field][expo]: dat_path = data_path + "%s/result/%s_shear.dat" % (field, chip) try: temp = numpy.loadtxt(dat_path, skiprows=1) if i == 0: data = temp else: data = numpy.row_stack((data, temp)) i += 1
pre_fields = tool_box.field_dict(cfht_cata_path + "nname.dat")[1] # loop the areas, # the catalog of each area will be stored in "w_i" num = [] for area_id in range(1, area_num + 1): # distribution the files field_tar = [] for field in pre_fields: if "w%d" % area_id in field: field_tar.append(field) # if fields are more than the threads, # some thread will get an empty "field_pool" field_pool = tool_box.allot(field_tar, cpus)[rank] # check anothers = ["w%d" % i for i in range(1, area_num + 1) if i != area_id] for field_name in field_pool: for ext_field in anothers: if ext_field in field_name: print("WRONG", rank, area_id, field_name) exit() field_count = 0 for field_name in field_pool: # c_data_path = cfht_cata_path + "field_dat/%s_new.dat"%field_name c_data_path = cfht_cata_path + "field_dat/%s.hdf5" % field_name pz_data_path = cfht_cata_path + "field_dat/%s-pz.dat" % field_name
from mpi4py import MPI from sys import path, argv path.append("/home/hklee/work/mylib") path.append("/home/hkli/work/mylib") import tool_box comm = MPI.COMM_WORLD rank = comm.Get_rank() numprocs = comm.Get_size() shear_num = 11 n, m = divmod(shear_num, numprocs) tasks = [i for i in range(shear_num)] my_task = tool_box.allot(tasks, numprocs)[rank] print(rank, my_task) sub_num = 300000 total_data = numpy.zeros((sub_num * 4, 4)) entry = [(0, sub_num), (sub_num, 2 * sub_num), (2 * sub_num, 3 * sub_num), (3 * sub_num, 4 * sub_num)] parent_path = argv[1] for ig in my_task: for i in range(len(entry)): data_path = parent_path + "/data_rotation_%d/data_%d_noisy.hdf5" % (i, ig) h5f = h5py.File(data_path, "r")
chisq = numpy.ndarray(buffer=buf1, dtype='d', shape=(ny,nx)) # array filled with zero chisq_new = numpy.ndarray(buffer=buf2, dtype='d', shape=(ny,nx)) # array filled with zero comm.Barrier() mg_bins = fq.set_bin(mg1, bin_num, 10) mg_bins_test = fq.set_bin(mg1, bin_num_test, 10) num_ini = numpy.histogram(mg1, mg_bins)[0] n1 = num_ini[0:bin_num2][inverse] n2 = num_ini[bin_num2:] num_exp = (n1 + n2) / 2 task_list = [i for i in range(nx*ny)] sub_task = tool_box.allot(task_list, cpus)[rank] for i in sub_task: m, n = divmod(i, nx) gh = a1_range[n] + a2_range[m]*x # y(a2)-axis, x(a1)-axis chisq[m,n] = fq.get_chisq(mg1, mnu1, gh, mg_bins, bin_num2,inverse, 0) chisq_new[m,n] = fq.get_chisq_new(mg1, mnu1, gh, mg_bins, bin_num2, inverse, 0, num_exp) comm.Barrier() if rank == 0: numpy.savez(cache_nm, chisq, chisq_new) img = Image_Plot(fig_x=12, fig_y=9) img.subplots(1, 1)
# collect: collect the data from the files of each field. It creates the "fourier_cata.hdf5" in # the parent directory of the one contain the field catalog. # If the catalog file doesn't exist, run it firstly !!!. # It will add the redshift parameters from CFHT catalog into the finial catalog. comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() data_path = "/mnt/perc/hklee/CFHT/catalog/fourier_cata_new/" raw_cata_path = data_path + "raw_cata_new/" dicts, fields = tool_box.field_dict(data_path + "nname.dat") my_field = tool_box.allot(fields, cpus)[rank] chip_num = 36 for field_nm in my_field: field_path = raw_cata_path + "%s/"%field_nm files = os.listdir(field_path) chip_exps = [] for nm in files: if ".dat" in nm: exp_nm = nm.split("p")[0] if exp_nm not in chip_exps: chip_exps.append(exp_nm) chip_exps.sort()
import matplotlib.pyplot as plt cmd = argv[1] comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() # cpus = 1 # rank = 0 log_path = "./log_%d.dat" % rank logger = tool_box.get_logger(log_path) nm_path = "/mw/w1234/original/nname.dat" all_expos, all_fields = tool_box.field_dict(nm_path) fields = tool_box.allot(all_fields, cpus)[rank] chip_data_path = "/mw/w1234/original/" result_path = "/lmc/cfht/para_fit/" my, mx = numpy.mgrid[0:4644, 0:2112] myf = my.flatten() mxf = mx.flatten() tags = numpy.arange(0, len(myf)) if cmd == "files": if rank == 0: for field_name in all_fields: pic_field_path = result_path + "pic/" + '%s/' % field_name if not os.path.exists(pic_field_path): os.mkdir(pic_field_path) data_field_path = result_path + "data/" + '%s/' % field_name
para_contents = [["para", "total_num", 1], ["para", "stamp_size", 1], ["para", "stamp_col", 1], ["para", "shear_num", 1], ["para", "noise_sig", 1], ["para", "pixel_scale", 1]] para_items = tool_box.config(para_path + "para.ini", ['get', 'get', 'get', 'get', 'get', 'get'], para_contents) total_chip_num = int(para_items[0]) stamp_size = int(para_items[1]) columns = int(para_items[2]) shear_num = int(para_items[3]) stamp_num = 10000 psf_path = total_path + "psf.fits" chip_labels = tool_box.allot([i for i in range(total_chip_num)], cpus)[rank] chip_num = len(chip_labels) fq = Fourier_Quad(stamp_size, 123) psf_img = galsim.Image(fits.open(psf_path)[0].data) log_informs = "RANK: %d, SOURCE: %s, TOTAL CHIPS: %d, MY CHIPS: %d (%d ~ %d)" % ( rank, source, total_chip_num, chip_num, chip_labels[0], chip_labels[-1]) logger.info(log_informs) ts = time.time() for i in range(shear_num): R_factor = numpy.zeros((chip_num * stamp_num, 1)) for tag, j in enumerate(chip_labels): log_informs = "%02d/gal_chip_%04d.fits start" % (i, j) logger.info(log_informs) t1 = time.time() chip_path = total_path + "%d/gal_chip_%04d.fits" % (i, j)
H_0 = 100 * h coeff = 1000 * C_0_hat # total point number total_z = 2000001 dz = 5 z_min = 0 z_max = z_min + (total_z - 1) * dz / 1000000. # redshift array redshift = numpy.zeros((total_z, )) for i in range(total_z): redshift[i] = z_min + i * dz / 1000000. # task distribution total_task = numpy.arange(0, total_z) my_task = tool_box.allot(total_task, cpus)[rank] # shared buffer itemsize = MPI.DOUBLE.Get_size() if rank == 0: nbytes = total_z * itemsize * 3 else: nbytes = 0 win1 = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm) buf1, itemsize = win1.Shared_query(0) # comoving distance (Mpc/h), integrate part of co-distance # angular diameter distance (Mpc/h) & the integrate part data_buf = numpy.ndarray(buffer=buf1, dtype='d', shape=(total_z, 3)) comm.Barrier()