def wrap_corr(pair, filename): r"""Do the correlation for a map_pair `pair`. Correlations in the `pair` (map_pair type) are saved to `filename` """ name = current_process().name (freq_cov, counts) = pair.freq_covariance() ft.save_pickle((freq_cov, counts), filename)
def calculate_correlation(self): r"""Note that multiprocessing's map() is more elegant than Process, but fails for handing in complex map_pair objects """ process_list = [] for pairitem in self.pairlist: filename = self.output_root filename_svd = filename + "SVD_pair_%s.pkl" % pairitem map1 = copy.deepcopy(np.array(self.pairs[pairitem].map1)) map2 = copy.deepcopy(np.array(self.pairs[pairitem].map2)) weight1 = copy.deepcopy(np.array(self.pairs[pairitem].noise_inv1)) weight2 = copy.deepcopy(np.array(self.pairs[pairitem].noise_inv2)) freqs1 = copy.deepcopy(self.pairs[pairitem].freq1) freqs2 = copy.deepcopy(self.pairs[pairitem].freq2) if self.params['clip_weight_percent'] is not None: print "Note: your are clipping the weight maps" percentile = self.params['clip_weight_percent'] mask1 = self.define_weightmask(weight1, percentile=percentile) mask2 = self.define_weightmask(weight2[:weight1.shape[0], ...], percentile=percentile) weight1 = self.saturate_weight(weight1, mask1) weight2 = self.saturate_weight(weight2, mask1) (freq_cov, counts) = find_modes.freq_covariance(map1, map2, weight1, weight2, freqs1, freqs2, no_weight=self.params['weighted_SVD']) n_modes = min(len(freqs1),len(freqs2)) svd_info = find_modes.get_freq_svd_modes(freq_cov, n_modes) ft.save_pickle(svd_info, filename_svd)
def wrap_corr(pair, filename): r"""Do the correlation for a map_pair `pair`. Correlations in the `pair` (map_pair type) are saved to `filename` """ name = current_process().name print "starting at %s on pair %s => %s" % (name, time.asctime(), filename) (corr, counts) = pair.correlate(pair.lags, speedup=True) ft.save_pickle((corr, counts), filename) print "%s finished at %s" % (name, time.asctime())
def calculate_svd(self): r"""calculate the SVD of all pairs""" for pairitem in self.pairlist: filename = self.output_root filename_corr = filename + "foreground_corr_pair_%s.pkl" % pairitem filename_svd = filename + "SVD_pair_%s.pkl" % pairitem print filename_corr if os.access(filename_corr, os.F_OK): print "SVD loading corr. functions: " + filename (corr, counts) = ft.load_pickle(filename_corr) # (vals, modes1, modes2) svd_info = ce.get_freq_svd_modes(corr, len(self.freq_list)) ft.save_pickle(svd_info, filename_svd) else: print "ERROR: in SVD, correlation functions not loaded" sys.exit()
def execute(self): '''Clean the maps of foregrounds, save the results, and get the autocorrelation.''' params = self.params freq_list = sp.array(params['freq_list'], dtype=int) lags = sp.array(params['lags']) # Write parameter file. kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=prefix) # Get the map data from file as well as the noise inverse. if len(params['file_middles']) == 1: fmid_name = params['file_middles'][0] params['file_middles'] = (fmid_name, fmid_name) if len(params['file_middles']) >= 2: # Deal with multiple files. num_maps = len(params['file_middles']) maps = [] noise_invs = [] # Load all maps and noises once. for map_index in range(0, num_maps): map_file = (params['input_root'] + params['file_middles'][map_index] + params['input_end_map']) print "Loading map %d of %d." % (map_index + 1, num_maps) map_in = algebra.make_vect(algebra.load(map_file)) maps.append(map_in) if not params["no_weights"]: noise_file = (params['input_root'] + params['file_middles'][map_index] + params['input_end_noise']) print "Loading noise %d of %d." % (map_index + 1, num_maps) noise_inv = algebra.make_mat( algebra.open_memmap(noise_file, mode='r')) noise_inv = noise_inv.mat_diag() else: noise_inv = algebra.ones_like(map_in) noise_invs.append(noise_inv) pairs = [] # Make pairs with deepcopies to not make mutability mistakes. for map1_index in range(0, num_maps): for map2_index in range(0, num_maps): if (map2_index > map1_index): map1 = copy.deepcopy(maps[map1_index]) map2 = copy.deepcopy(maps[map2_index]) noise_inv1 = copy.deepcopy(noise_invs[map1_index]) noise_inv2 = copy.deepcopy(noise_invs[map2_index]) pair = map_pair.MapPair(map1, map2, noise_inv1, noise_inv2, freq_list) pair.lags = lags pair.params = params # Keep track of the names of maps in pairs so # it knows what to save later. pair.set_names(params['file_middles'][map1_index], params['file_middles'][map2_index]) pairs.append(pair) num_map_pairs = len(pairs) print "%d map pairs created from %d maps." % (len(pairs), num_maps) # Hold a reference in self. self.pairs = pairs # Get maps/ noise inv ready for running. if params["convolve"]: for pair in pairs: pair.degrade_resolution() if params['factorizable_noise']: for pair in pairs: pair.make_noise_factorizable() if params['sub_weighted_mean']: for pair in pairs: pair.subtract_weighted_mean() self.pairs = pairs # Since correlating takes so long, if you already have the svds # you can skip this first correlation [since that's all it's really # for and it is the same no matter how many modes you want]. # Note: map_pairs will not have anything saved in 'fore_corr' if you # skip this correlation. if not params['skip_fore_corr']: # Correlate the maps with multiprocessing. Note that the # correlations are saved to file separately then loaded in # together because that's (one way) how multiprocessing works. fore_pairs = [] processes_list = [] for pair_index in range(0, num_map_pairs): # Calls 1 multiproc (which governs the correlating) for each # pair on a new CPU so you can have all pairs working at once. multi = multiprocessing.Process(target=multiproc, args=([pairs[pair_index], params['output_root'], pair_index, False])) processes_list.append(multi) multi.start() # Waits for all correlations to finish before continuing. while True in [multi.is_alive() for multi in processes_list]: print "processing" time.sleep(5) # just to be safe time.sleep(1) # more concise call, but multiprocessing does not behave well with # complex objects........... #runlist = [(pair_index, # params['output_root'], # False) for # pair_index in range(0, num_map_pairs)] #pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) #pool.map(self.multiproc, runlist) # Load the correlations and save them to each pair. The pairs that # got passed to multiproc are not the same ones as ones in # self.pairs, so this must be done to have actual values. print "Loading map pairs back into program." file_name = params['output_root'] file_name += "map_pair_for_freq_slices_fore_corr_" for count in range(0, num_map_pairs): print "Loading correlation for pair %d" % (count) pickle_handle = open(file_name + str(count) + ".pkl", "r") correlate_results = cPickle.load(pickle_handle) pairs[count].fore_corr = correlate_results[0] pairs[count].fore_counts = correlate_results[1] fore_pairs.append(pairs[count]) pickle_handle.close() self.fore_pairs = copy.deepcopy(fore_pairs) # With this, you do not need fore_pairs anymore. self.pairs = copy.deepcopy(fore_pairs) pairs = self.pairs # Get foregrounds. # svd_info_list keeps track of all of the modes of all maps in # all pairs. This means if you want to subract a different number # of modes for the same maps/noises/frequencies, you have the modes # already saved and do not need to run the first correlation again. svd_info_list = [] for pair in pairs: vals, modes1, modes2 = cf.get_freq_svd_modes(pair.fore_corr, len(freq_list)) pair.vals = vals # Save ALL of the modes for reference. pair.all_modes1 = modes1 pair.all_modes2 = modes2 svd_info = (vals, modes1, modes2) svd_info_list.append(svd_info) # Save only the modes you want to subtract. n_modes = params['modes'] pair.modes1 = modes1[:n_modes] pair.modes2 = modes2[:n_modes] self.svd_info_list = svd_info_list self.pairs = pairs if params['save_svd_info']: ft.save_pickle(self.svd_info_list, params['svd_file']) else: # The first correlation and svd has been skipped. # This means you already have the modes so you can just load # them from file. self.svd_info_list = ft.load_pickle(params['svd_file']) # Set the svd info to the pairs. for i in range(0, len(pairs)): svd_info = self.svd_info_list[i] pairs[i].vals = svd_info[0] pairs[i].all_modes1 = svd_info[1] pairs[i].all_modes2 = svd_info[2] n_modes = params['modes'] pairs[i].modes1 = svd_info[1][:n_modes] pairs[i].modes2 = svd_info[2][:n_modes] self.pairs = pairs # Subtract foregrounds. for pair_index in range(0, len(pairs)): pairs[pair_index].subtract_frequency_modes( pairs[pair_index].modes1, pairs[pair_index].modes2) # Save cleaned clean maps, cleaned noises, and modes. self.save_data(save_maps=params['save_maps'], save_noises=params['save_noises'], save_modes=params['save_modes']) # Finish if this was just first pass. if params['first_pass_only']: self.pairs = pairs return # Correlate the cleaned maps. # Here we could calculate the power spectrum instead eventually. temp_pair_list = [] processes_list = [] for pair_index in range(0, num_map_pairs): multi = multiprocessing.Process(target=multiproc, args=([pairs[pair_index], params['output_root'], pair_index, True])) processes_list.append(multi) multi.start() while True in [multi.is_alive() for multi in processes_list]: print "processing" time.sleep(5) # just to be safe time.sleep(1) # ugh, would really rathter use implementation below except multiprocessing # does not behave................. #runlist = [(pairs[pair_index], # params['output_root'], # pair_index, True) for # pair_index in range(0, num_map_pairs)] #pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) #pool.map(multiproc, runlist) print "Loading map pairs back into program." file_name = params['output_root'] file_name += "map_pair_for_freq_slices_corr_" for count in range(0, num_map_pairs): print "Loading correlation for pair %d" % (count) pickle_handle = open(file_name + str(count) + ".pkl", "r") correlate_results = cPickle.load(pickle_handle) pairs[count].corr = correlate_results[0] pairs[count].counts = correlate_results[1] temp_pair_list.append(pairs[count]) pickle_handle.close() self.pairs = copy.deepcopy(temp_pair_list) # Get the average correlation and its standard deviation. corr_list = [] for pair in self.pairs: corr_list.append(pair.corr) self.corr_final, self.corr_std = cf.get_corr_and_std_3d(corr_list) if params['pickle_slices']: ft.save_pickle(self, self.params['output_root'] + \ 'New_Slices_object.pkl') return
def execute(self): '''Clean the maps of foregrounds, save the results, and get the autocorrelation.''' params = self.params freq_list = sp.array(params['freq_list'], dtype=int) lags = sp.array(params['lags']) # Write parameter file. kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=prefix) # Get the map data from file as well as the noise inverse. if len(params['file_middles']) == 1: fmid_name = params['file_middles'][0] params['file_middles'] = (fmid_name, fmid_name) if len(params['file_middles']) >= 2: # Deal with multiple files. num_maps = len(params['file_middles']) maps = [] noise_invs = [] # Load all maps and noises once. for map_index in range(0, num_maps): map_file = (params['input_root'] + params['file_middles'][map_index] + params['input_end_map']) print "Loading map %d of %d." % (map_index + 1, num_maps) map_in = algebra.make_vect(algebra.load(map_file)) maps.append(map_in) if not params["no_weights"]: noise_file = (params['input_root'] + params['file_middles'][map_index] + params['input_end_noise']) print "Loading noise %d of %d." % (map_index + 1, num_maps) noise_inv = algebra.make_mat( algebra.open_memmap(noise_file, mode='r')) noise_inv = noise_inv.mat_diag() else: noise_inv = algebra.ones_like(map_in) noise_invs.append(noise_inv) pairs = [] # Make pairs with deepcopies to not make mutability mistakes. for map1_index in range(0, num_maps): for map2_index in range(0, num_maps): if (map2_index > map1_index): map1 = copy.deepcopy(maps[map1_index]) map2 = copy.deepcopy(maps[map2_index]) noise_inv1 = copy.deepcopy(noise_invs[map1_index]) noise_inv2 = copy.deepcopy(noise_invs[map2_index]) pair = map_pair.MapPair(map1, map2, noise_inv1, noise_inv2, freq_list) pair.lags = lags pair.params = params # Keep track of the names of maps in pairs so # it knows what to save later. pair.set_names(params['file_middles'][map1_index], params['file_middles'][map2_index]) pairs.append(pair) num_map_pairs = len(pairs) print "%d map pairs created from %d maps." % (len(pairs), num_maps) # Hold a reference in self. self.pairs = pairs # Get maps/ noise inv ready for running. if params["convolve"]: for pair in pairs: pair.degrade_resolution() if params['factorizable_noise']: for pair in pairs: pair.make_noise_factorizable() if params['sub_weighted_mean']: for pair in pairs: pair.subtract_weighted_mean() self.pairs = pairs # Since correlating takes so long, if you already have the svds # you can skip this first correlation [since that's all it's really # for and it is the same no matter how many modes you want]. # Note: map_pairs will not have anything saved in 'fore_corr' if you # skip this correlation. if not params['skip_fore_corr']: # Correlate the maps with multiprocessing. Note that the # correlations are saved to file separately then loaded in # together because that's (one way) how multiprocessing works. fore_pairs = [] processes_list = [] for pair_index in range(0, num_map_pairs): # Calls 1 multiproc (which governs the correlating) for each # pair on a new CPU so you can have all pairs working at once. multi = multiprocessing.Process(target=multiproc, args=([ pairs[pair_index], params['output_root'], pair_index, False ])) processes_list.append(multi) multi.start() # Waits for all correlations to finish before continuing. while True in [multi.is_alive() for multi in processes_list]: print "processing" time.sleep(5) # just to be safe time.sleep(1) # more concise call, but multiprocessing does not behave well with # complex objects........... #runlist = [(pair_index, # params['output_root'], # False) for # pair_index in range(0, num_map_pairs)] #pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) #pool.map(self.multiproc, runlist) # Load the correlations and save them to each pair. The pairs that # got passed to multiproc are not the same ones as ones in # self.pairs, so this must be done to have actual values. print "Loading map pairs back into program." file_name = params['output_root'] file_name += "map_pair_for_freq_slices_fore_corr_" for count in range(0, num_map_pairs): print "Loading correlation for pair %d" % (count) pickle_handle = open(file_name + str(count) + ".pkl", "r") correlate_results = cPickle.load(pickle_handle) pairs[count].fore_corr = correlate_results[0] pairs[count].fore_counts = correlate_results[1] fore_pairs.append(pairs[count]) pickle_handle.close() self.fore_pairs = copy.deepcopy(fore_pairs) # With this, you do not need fore_pairs anymore. self.pairs = copy.deepcopy(fore_pairs) pairs = self.pairs # Get foregrounds. # svd_info_list keeps track of all of the modes of all maps in # all pairs. This means if you want to subract a different number # of modes for the same maps/noises/frequencies, you have the modes # already saved and do not need to run the first correlation again. svd_info_list = [] for pair in pairs: vals, modes1, modes2 = cf.get_freq_svd_modes( pair.fore_corr, len(freq_list)) pair.vals = vals # Save ALL of the modes for reference. pair.all_modes1 = modes1 pair.all_modes2 = modes2 svd_info = (vals, modes1, modes2) svd_info_list.append(svd_info) # Save only the modes you want to subtract. n_modes = params['modes'] pair.modes1 = modes1[:n_modes] pair.modes2 = modes2[:n_modes] self.svd_info_list = svd_info_list self.pairs = pairs if params['save_svd_info']: ft.save_pickle(self.svd_info_list, params['svd_file']) else: # The first correlation and svd has been skipped. # This means you already have the modes so you can just load # them from file. self.svd_info_list = ft.load_pickle(params['svd_file']) # Set the svd info to the pairs. for i in range(0, len(pairs)): svd_info = self.svd_info_list[i] pairs[i].vals = svd_info[0] pairs[i].all_modes1 = svd_info[1] pairs[i].all_modes2 = svd_info[2] n_modes = params['modes'] pairs[i].modes1 = svd_info[1][:n_modes] pairs[i].modes2 = svd_info[2][:n_modes] self.pairs = pairs # Subtract foregrounds. for pair_index in range(0, len(pairs)): pairs[pair_index].subtract_frequency_modes( pairs[pair_index].modes1, pairs[pair_index].modes2) # Save cleaned clean maps, cleaned noises, and modes. self.save_data(save_maps=params['save_maps'], save_noises=params['save_noises'], save_modes=params['save_modes']) # Finish if this was just first pass. if params['first_pass_only']: self.pairs = pairs return # Correlate the cleaned maps. # Here we could calculate the power spectrum instead eventually. temp_pair_list = [] processes_list = [] for pair_index in range(0, num_map_pairs): multi = multiprocessing.Process(target=multiproc, args=([ pairs[pair_index], params['output_root'], pair_index, True ])) processes_list.append(multi) multi.start() while True in [multi.is_alive() for multi in processes_list]: print "processing" time.sleep(5) # just to be safe time.sleep(1) # ugh, would really rathter use implementation below except multiprocessing # does not behave................. #runlist = [(pairs[pair_index], # params['output_root'], # pair_index, True) for # pair_index in range(0, num_map_pairs)] #pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) #pool.map(multiproc, runlist) print "Loading map pairs back into program." file_name = params['output_root'] file_name += "map_pair_for_freq_slices_corr_" for count in range(0, num_map_pairs): print "Loading correlation for pair %d" % (count) pickle_handle = open(file_name + str(count) + ".pkl", "r") correlate_results = cPickle.load(pickle_handle) pairs[count].corr = correlate_results[0] pairs[count].counts = correlate_results[1] temp_pair_list.append(pairs[count]) pickle_handle.close() self.pairs = copy.deepcopy(temp_pair_list) # Get the average correlation and its standard deviation. corr_list = [] for pair in self.pairs: corr_list.append(pair.corr) self.corr_final, self.corr_std = cf.get_corr_and_std_3d(corr_list) if params['pickle_slices']: ft.save_pickle(self, self.params['output_root'] + \ 'New_Slices_object.pkl') return