def clean_file(self, middle, modes): params = self.params file_name = (params['input_root'] + middle + params['input_end']) # Output parameters. Writer = core.fitsGBT.Writer(feedback=self.feedback) out_filename = (params['output_root'] + middle + params['output_end']) band_inds = params["IFs"] Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback) n_bands = len(Reader.IF_set) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) # Get the key that will group this file with other files. key = get_key(middle) # Read one block to figure out how many polarizations and channels # there are. Data = Reader.read(0, 0) n_pol = Data.dims[1] n_cal = Data.dims[2] n_chan = Data.dims[3] for ii in range(n_bands_proc): Blocks = Reader.read((), ii) this_band_modes = modes[ii, ...] for Data in Blocks: clean_data(Data, this_band_modes) Writer.add_data(Data) # Write the data back out. utils.mkparents(out_filename) Writer.write(out_filename)
def execute(self) : """Process all data.""" # You have access to the input parameters through the dictionary # self.params. params = self.params # If you have output files, make parent directories if need be. utils.mkparents(params['output_root']) # Write the input parameters to file so you can go back and look at # them. parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=self.prefix) # Loop over the files to process. for file_middle in params['file_middles'] : input_fname = (params['input_root'] + file_middle + params['input_end']) # Read in the data. The reader is an object that can read # DataBlock objects out of a fits file. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) # Some examples of how you would read DataBlock Objects: first_scan_and_IF_DataBlock = Reader.read(scans=0,IFs=0) second_scan_and_first_IF_DataBlock = Reader.read(scans=1,IFs=0) list_of_a_few_data_blocks = Reader.read(scans=(1,2,3),IFs=0) list_of_all_data_blocks = Reader.read(scans=(),IFs=())
def process_file(self, file_ind): params = self.params scan_length = params["scan_length"] file_middle = params["file_middles"][file_ind] input_fname = params["input_root"] + file_middle + params["input_end"] output_fname = params["output_root"] + file_middle + params["output_end"] Writer = fitsGBT.Writer(feedback=self.feedback) Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) scan_inds = params["scans"] if len(scan_inds) == 0 or scan_inds is None: scan_inds = range(len(Reader.scan_set)) # Loop over scans. jj = 0 for thisscan in scan_inds: Blocks = Reader.read(thisscan, params["IFs"], force_tuple=True) for Data in Blocks: self.action(Data, Writer) del Blocks gc.collect() # Go to a new line if we are printing statistics. if hasattr(self, "feedback_title") and self.feedback > 1: print "" # Finally write the data back to file. utils.mkparents(output_fname) Writer.write(output_fname)
def mpiexecute(self, n_processes=1): """ Process all data with MPI To start with MPI, you need to change manager.py calling mpiexecute instead of execute. and do, $ mpirun -np 9 --bynode python manager.py pipeline.pipe """ comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() params = self.params if rank == 0: output_fname = params["output_root"] + params["file_middles"][0] + params["output_end"] utils.mkparents(output_fname) comm.barrier() n_files = len(params["file_middles"]) for file_ind in range(n_files)[rank::size]: self.process_file(file_ind) comm.barrier()
def execute(self): """Process all data.""" # You have access to the input parameters through the dictionary # self.params. params = self.params # If you have output files, make parent directories if need be. utils.mkparents(params['output_root']) # Write the input parameters to file so you can go back and look at # them. parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=self.prefix) # Loop over the files to process. for file_middle in params['file_middles']: input_fname = (params['input_root'] + file_middle + params['input_end']) # Read in the data. The reader is an object that can read # DataBlock objects out of a fits file. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) # Some examples of how you would read DataBlock Objects: first_scan_and_IF_DataBlock = Reader.read(scans=0, IFs=0) second_scan_and_first_IF_DataBlock = Reader.read(scans=1, IFs=0) list_of_a_few_data_blocks = Reader.read(scans=(1, 2, 3), IFs=0) list_of_all_data_blocks = Reader.read(scans=(), IFs=())
def process_file(self, file_ind) : params = self.params file_middle = params['file_middles'][file_ind] input_fname = (params['input_root'] + file_middle + params['input_end']) sub_input_fname = (params['subtracted_input_root'] + file_middle + params['input_end']) output_fname = (params['output_root'] + file_middle + params['output_end']) sub_output_fname = (params['subtracted_output_root'] + file_middle + params['output_end']) Writer = fitsGBT.Writer(feedback=self.feedback) SubWriter = fitsGBT.Writer(feedback=self.feedback) # Read in the data, and loop over data blocks. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) SubReader = fitsGBT.Reader(sub_input_fname, feedback=self.feedback) if (sp.any(Reader.scan_set != SubReader.scan_set) or sp.any(Reader.IF_set != SubReader.IF_set)) : raise ce.DataError("IFs and scans don't match signal subtracted" " data.") # Get the number of scans if asked for all of them. scan_inds = params['scans'] if len(scan_inds) == 0 or scan_inds is None : scan_inds = range(len(Reader.scan_set)) if_inds = params['IFs'] if len(if_inds) == 0 or scan_inds is None : if_inds = range(len(Reader.IF_set)) if self.feedback > 1 : print "New flags each block:", # Loop over scans and IFs for thisscan in scan_inds : for thisIF in if_inds : Data = Reader.read(thisscan, thisIF) SubData = SubReader.read(thisscan, thisIF) n_flags = ma.count_masked(Data.data) # Now do the flagging. flag(Data, SubData, params['thres']) Data.add_history("Reflaged for outliers.", ("Used file: " + utils.abbreviate_file_path(sub_input_fname),)) SubData.add_history("Reflaged for outliers.") Writer.add_data(Data) SubWriter.add_data(SubData) # Report the numbe of new flags. n_flags = ma.count_masked(Data.data) - n_flags if self.feedback > 1 : print n_flags, if self.feedback > 1 : print '' # Finally write the data back to file. utils.mkparents(output_fname) utils.mkparents(sub_output_fname) Writer.write(output_fname) SubWriter.write(sub_output_fname)
def execute(self, n_processes=1): """Process all data. If n_processes > 1 then this function spawns a bunch of subprocesses in parralelle, each of which deals with a single data file. This both speeds things up and avoids any memory leaks (like the bad one in pyfits). """ params = self.params # Make parent directories if need be. utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=self.prefix) n_new = n_processes - 1 n_files = len(params['file_middles']) # Loop over the files to process. if n_new <= 0: # Single process mode. for file_ind in range(n_files): self.process_file(file_ind) elif n_new > 32: raise ValueError("Asked for a rediculouse number of processes: " + str(n_new) + ". Limit is 32.") else: # Spawn a bunch of new processes each with a single file to # analyse. # Can't us an mp.Pool here because we don't want to reuse processes # due to pyfits memory leak. process_list = range(n_new) for ii in xrange(n_files + n_new): if ii >= n_new: process_list[ii % n_new].join() if process_list[ii % n_new].exitcode != 0: raise RuntimeError("A thread failed with exit code: " + str(process_list[ii % n_new].exitcode)) if ii < n_files: process_list[ii % n_new] = mp.Process( target=self.process_file, args=(ii, )) process_list[ii % n_new].start()
def execute(self, n_processes=1) : """Process all data. If n_processes > 1 then this function spawns a bunch of subprocesses in parralelle, each of which deals with a single data file. This both speeds things up and avoids any memory leaks (like the bad one in pyfits). """ params = self.params # Make parent directories if need be. utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=self.prefix) n_new = n_processes - 1 n_files = len(params['file_middles']) # Loop over the files to process. if n_new <= 0 : # Single process mode. for file_ind in range(n_files) : self.process_file(file_ind) elif n_new > 32 : raise ValueError("Asked for a rediculouse number of processes: " + str(n_new) + ". Limit is 32.") else : # Spawn a bunch of new processes each with a single file to # analyse. # Can't us an mp.Pool here because we don't want to reused processes # due to pyfits memory leak. process_list = range(n_new) for ii in xrange(n_files + n_new) : if ii > n_new : process_list[ii%n_new].join() if process_list[ii%n_new].exitcode != 0 : raise RuntimeError("A thread failed with exit code: " + str(process_list[ii%n_new].exitcode)) if ii < n_files : process_list[ii%n_new] = mp.Process( target=self.process_file, args=(ii,)) process_list[ii%n_new].start()
def process_file(self, file_ind): """Process on file from the list to be processed based on the passed index. """ self.file_ind = file_ind params = self.params file_middle = params['file_middles'][file_ind] input_fname = (params['input_root'] + file_middle + params['input_end']) output_fname = (params['output_root'] + file_middle + params['output_end']) Writer = fitsGBT.Writer(feedback=self.feedback) # Read in the data, and loop over data blocks. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) if hasattr(self, 'feedback_title') and self.feedback > 1: print self.feedback_title, # Get the number of scans if asked for all of them. scan_inds = params['scans'] if len(scan_inds) == 0 or scan_inds is None: scan_inds = range(len(Reader.scan_set)) # Loop over scans. for thisscan in scan_inds: Blocks = Reader.read(thisscan, params['IFs'], force_tuple=True) # Function that loops over DataBlocks within a scan. NewBlocks = self.scan_action(Blocks) del Blocks Writer.add_data(NewBlocks) # Go to a new line if we are printing statistics. if hasattr(self, 'feedback_title') and self.feedback > 1: print '' # Finally write the data back to file. utils.mkparents(output_fname) Writer.write(output_fname)
def process_file(self, file_ind) : """Process on file from the list to be processed based on the passed index. """ self.file_ind = file_ind params = self.params file_middle = params['file_middles'][file_ind] input_fname = (params['input_root'] + file_middle + params['input_end']) output_fname = (params['output_root'] + file_middle + params['output_end']) Writer = fitsGBT.Writer(feedback=self.feedback) # Read in the data, and loop over data blocks. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) if hasattr(self, 'feedback_title') and self.feedback > 1: print self.feedback_title, # Get the number of scans if asked for all of them. scan_inds = params['scans'] if len(scan_inds) == 0 or scan_inds is None : scan_inds = range(len(Reader.scan_set)) # Loop over scans. for thisscan in scan_inds : Blocks = Reader.read(thisscan, params['IFs'], force_tuple=True) # Function that loops over DataBlocks within a scan. NewBlocks = self.scan_action(Blocks) del Blocks Writer.add_data(NewBlocks) # Go to a new line if we are printing statistics. if hasattr(self, 'feedback_title') and self.feedback > 1: print '' # Finally write the data back to file. utils.mkparents(output_fname) Writer.write(output_fname)
def clean_file(self, middle, modes): params = self.params file_name = (params['input_root'] + middle + params['input_end']) # Output parameters. Writer = core.fitsGBT.Writer(feedback=self.feedback) out_filename = (params['output_root'] + middle + params['output_end']) band_inds = params["IFs"] Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback) n_bands = len(Reader.IF_set) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) # Get the key that will group this file with other files. key = get_key(middle) # Read one block to figure out how many polarizations and channels # there are. Data = Reader.read(0,0) n_pol = Data.dims[1] n_cal = Data.dims[2] n_chan = Data.dims[3] for ii in range(n_bands_proc): Blocks = Reader.read((), ii) this_band_modes = modes[ii,...] for Data in Blocks: clean_data(Data, this_band_modes) Writer.add_data(Data) # Write the data back out. utils.mkparents(out_filename) Writer.write(out_filename)
def process_file(self, file_ind): params = self.params file_middle = params['file_middles'][file_ind] input_fname = (params['input_root'] + file_middle + params['input_end']) sub_input_fname = (params['subtracted_input_root'] + file_middle + params['input_end']) output_fname = (params['output_root'] + file_middle + params['output_end']) sub_output_fname = (params['subtracted_output_root'] + file_middle + params['output_end']) Writer = fitsGBT.Writer(feedback=self.feedback) SubWriter = fitsGBT.Writer(feedback=self.feedback) # Read in the data, and loop over data blocks. Reader = fitsGBT.Reader(input_fname, feedback=self.feedback) SubReader = fitsGBT.Reader(sub_input_fname, feedback=self.feedback) if (sp.any(Reader.scan_set != SubReader.scan_set) or sp.any(Reader.IF_set != SubReader.IF_set)): raise ce.DataError("IFs and scans don't match signal subtracted" " data.") # Get the number of scans if asked for all of them. scan_inds = params['scans'] if len(scan_inds) == 0 or scan_inds is None: scan_inds = range(len(Reader.scan_set)) if_inds = params['IFs'] if len(if_inds) == 0 or scan_inds is None: if_inds = range(len(Reader.IF_set)) if self.feedback > 1: print "New flags each block:", # Loop over scans and IFs for thisscan in scan_inds: for thisIF in if_inds: Data = Reader.read(thisscan, thisIF) SubData = SubReader.read(thisscan, thisIF) # Make sure they have agreeing masks to start. SubData.data[ma.getmaskarray(Data.data)] = ma.masked Data.data[ma.getmaskarray(SubData.data)] = ma.masked # Get initial number of flags. n_flags = ma.count_masked(Data.data) # Now do the flagging. flag(Data, SubData, params['thres'], params['max_noise_factor'], params['smooth_modes_subtract'], params['filter_type']) Data.add_history( "Reflaged for outliers.", ("Used file: " + utils.abbreviate_file_path(sub_input_fname), )) SubData.add_history("Reflaged for outliers.") Writer.add_data(Data) SubWriter.add_data(SubData) # Report the number of new flags. n_flags = ma.count_masked(Data.data) - n_flags if self.feedback > 1: print n_flags, if self.feedback > 1: print '' # Finally write the data back to file. utils.mkparents(output_fname) utils.mkparents(sub_output_fname) Writer.write(output_fname) SubWriter.write(sub_output_fname)
def execute(self, n_processes=1): utils.mkparents(self.params['subtracted_output_root']) base_single.BaseSingle.execute(self, n_processes)
def execute(self, nprocesses=1) : params = self.params scans = list(params["scans"]) # Make sure that the output directory exists. utils.mkparents(params["output_root"]) # Now we need to read in the Scan Log fits file. log_dir = params["fits_log_dir"] scan_log_list = pyfits.open(log_dir + "/ScanLog.fits", "readonly") # From the header we need the project session. session = scan_log_list[0].header["PROJID"].split('_')[-1] scan_log = scan_log_list[1].data self.scan_log = scan_log # Keep track of scans already processed because some scans are # processed by being in the same map as another. finished_scans = [] for initial_scan in scans : if initial_scan in finished_scans : continue self.initial_scan = initial_scan # Open the go fits file. scan_log_files = scan_log.field('FILEPATH')[ scan_log.field('SCAN')==initial_scan] go_file = log_dir + get_filename_from_key(scan_log_files, "GO") go_hdu = pyfits.open(go_file)[0].header # From the go information get the source and the scan type. object = go_hdu["OBJECT"].strip() self.proceedure = go_hdu["PROCNAME"].strip().lower() if params["combine_map_scans"] : # Read the go file and figure out all the scans in the same # map. # Check the go files for all scans make sure everything is # consistant. self.n_scans_proc = go_hdu["PROCSIZE"] # Which scan this is of the sequence (1 indexed). self.initial_scan_ind = go_hdu["PROCSEQN"] scans_this_file = (sp.arange(self.n_scans_proc, dtype=int) + 1 - self.initial_scan_ind + initial_scan) scans_this_file = list(scans_this_file) else : scans_this_file = [initial_scan] finished_scans += scans_this_file # Initialize a list to store all the data that will be saved to a # single fits file (generally 8 scans). Block_list = [] # Loop over the scans to process for this output file. np = nprocesses n = len(scans_this_file) procs = [None]*np pipes = [None]*np for ii in range(n+np) : if ii >= np : scan = scans_this_file[ii-np] if not scan in params["blacklist"] : Data = pipes[ii%np].recv() procs[ii%np].join() # Store our processed data. if Data == -1 : # Scan proceedure aborted. message = ("Scan proceedures do not agree." " Perhase a scan was aborted. Scans: " + str(scan) + ", " + str(initial_scan) + " in directory: " + log_dir) raise ce.DataError(message) elif Data is None : message = ("Missing or corrupted psrfits file." " Scan: " + str(scan) + " file roots: " + str(params["guppi_input_roots"])) warnings.warn(message) else : Block_list.append(Data) if ii < n : scan = scans_this_file[ii] # The acctual reading of the guppi fits file needs to # be split off in a different process due to a memory # leak in pyfits. This also allow parralization. # Make a pipe over which we will receive out data back. if not scan in params["blacklist"] : P_here, P_far = mp.Pipe() pipes[ii%np] = P_here # Start the forked process. p = mp.Process(target=self.processfile, args=(scan, P_far)) p.start() procs[ii%np] = p # End loop over scans (input files). # Now we can write our list of scans to disk. if len(scans_this_file) > 1 : str_scan_range = (str(scans_this_file[0]) + '-' + str(scans_this_file[-1])) else : str_scan_range = str(scans_this_file[0]) out_file = (params["output_root"] + session + '_' + object + '_' + self.proceedure + '_' + str_scan_range + '.fits') # Output data is pretty large so we'd better protect the # pyfits part in a process lest memory leaks kill us. if len(Block_list) > 0 : p = mp.Process(target=out_write, args=(Block_list, out_file)) p.start() del Block_list p.join()
def svd_spec_time(data, params, file_ind, freq=None, time=None): # data[...,:100] = np.inf # data[...,-100:] = np.inf # data[...,1640:1740] = np.inf # data[...,2066:2166] = np.inf time_mask = np.logical_not(np.all(np.logical_not(np.isfinite(data)), axis=(2, 3))) freq_mask = np.all(np.isfinite(data[time_mask[..., None, None]]), axis=(0, 2)) data[freq_mask[None, :, None, :]] = np.ma.masked # freq_mask = np.any(np.isfinite(data), axis=(0, 2)) weights = np.ones(data.shape) data_mask = np.logical_not(np.isfinite(data)) weights[data_mask] = 0.0 data[data_mask] = 0.0 # if np.sum(weights) < np.prod(weights.shape) * 0.1: # #print "Warning: too much data masked, no svd performed" # msg = ("WARNING: too much data masked, no svd performed") # warnings.warn(msg) # data[data_mask] = np.inf # return data sh = data.shape # for XX data_svd = data[:, 0, :, :].reshape([-1, sh[-1]])[:, freq_mask[0, :]] weight_svd = weights[:, 0, :, :].reshape([-1, sh[-1]])[:, freq_mask[0, :]] # check flag percent weight_svd = np.ma.array(weight_svd) weight_svd[weight_svd == 0] = np.ma.masked percent = float(np.ma.count_masked(weight_svd)) / weight_svd.size * 100 print "Flag percent XX: %f%%" % percent if np.sum(weight_svd) < np.prod(weight_svd.shape) * 0.1 or data_svd.shape[-1] < 10: # print "Warning: too much data masked, no svd performed" msg = "WARNING: too much data masked for XX, no svd performed" warnings.warn(msg) data[data_mask] = np.inf return data vec_t, val, vec_f = linalg.svd(data_svd) vec_f = vec_f.T sorted_index = np.argsort(val)[::-1] vec_t = vec_t[:, sorted_index] vec_f = vec_f[:, sorted_index] val = val[sorted_index] modes = params["modes"] amps = sp.empty((modes, data_svd.shape[0])) for i in np.arange(modes): amp = sp.tensordot(vec_f[:, i], data_svd * weight_svd, axes=(0, 1)) amp /= sp.tensordot(vec_f[:, i], vec_f[:, i][None, :] * weight_svd, axes=(0, 1)) data_svd -= vec_f[:, i][None, :] * amp[:, None] amps[i, :] = amp del amp data[:, 0, :, :][..., freq_mask[0, :]] = data_svd.reshape([sh[0], 2, -1]) if params["save_svd"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5" utils.mkparents(f_name) f = h5py.File(f_name, "w") f["singular_values"] = val f["left_vectors"] = vec_t.T f["right_vectors"] = vec_f.T # f['outmap_left'] = outmap_left f["outmap_right"] = amps # f['map_left'] = map1 f["map_right"] = data[:, 0, :, :] f["freq_mask"] = freq_mask[0, :] f["freq"] = freq f["time"] = time f.close() if params["save_plot"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5" utils.mkparents(f_name) check_svd(f_name, [val, vec_t.T, vec_f.T], freq_mask[0, :], freq) check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq) del data_svd, weight_svd, val, vec_t, vec_f, amps gc.collect() # for YY data_svd = data[:, 3, :, :].reshape([-1, sh[-1]])[:, freq_mask[3, :]] weight_svd = weights[:, 3, :, :].reshape([-1, sh[-1]])[:, freq_mask[3, :]] # check flag percent weight_svd = np.ma.array(weight_svd) weight_svd[weight_svd == 0] = np.ma.masked percent = float(np.ma.count_masked(weight_svd)) / weight_svd.size * 100 print "Flag percent XX: %f%%" % percent if np.sum(weight_svd) < np.prod(weight_svd.shape) * 0.1 or data_svd.shape[-1] < 10: # print "Warning: too much data masked, no svd performed" msg = "WARNING: too much data masked for YY, no svd performed" warnings.warn(msg) data[data_mask] = np.inf return data vec_t, val, vec_f = linalg.svd(data_svd) vec_f = vec_f.T sorted_index = np.argsort(val)[::-1] vec_t = vec_t[:, sorted_index] vec_f = vec_f[:, sorted_index] val = val[sorted_index] modes = params["modes"] amps = sp.empty((modes, data_svd.shape[0])) for i in np.arange(modes): amp = sp.tensordot(vec_f[:, i], data_svd * weight_svd, axes=(0, 1)) amp /= sp.tensordot(vec_f[:, i], vec_f[:, i][None, :] * weight_svd, axes=(0, 1)) data_svd -= vec_f[:, i][None, :] * amp[:, None] amps[i, :] = amp del amp data[:, 3, :, :][..., freq_mask[3, :]] = data_svd.reshape([sh[0], 2, -1]) if params["save_svd"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5" utils.mkparents(f_name) f = h5py.File(f_name, "w") f["singular_values"] = val f["left_vectors"] = vec_t.T f["right_vectors"] = vec_f.T # f['outmap_left'] = outmap_left f["outmap_right"] = amps # f['map_left'] = map1 f["map_right"] = data[:, 3, :, :] f["freq_mask"] = freq_mask[3, :] f["freq"] = freq f["time"] = time f.close() if params["save_plot"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5" utils.mkparents(f_name) check_svd(f_name, [val, vec_t.T, vec_f.T], freq_mask[0, :], freq) check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq) del data_svd, weight_svd, val, vec_t, vec_f, amps gc.collect() data[data_mask] = np.inf return data
def calibrate_file(self, middle, gain, freq): # This function is largely cut and pasted from process file. I should # really combine the code into an iterator but that's a lot of work. # Alternativly, I could make a meta function and pass a function to it. params = self.params file_name = (params['input_root'] + middle + params['input_end']) # Output parameters. Writer = core.fitsGBT.Writer(feedback=self.feedback) out_filename = (params['output_root'] + middle + params['output_end']) band_inds = params["IFs"] Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback) n_bands = len(Reader.IF_set) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) if not band_inds: band_inds = range(n_bands) # Number of bands we acctually process. n_bands_proc = len(band_inds) # Get the key that will group this file with other files. key = get_key(middle) # Read one block to figure out how many polarizations and channels # there are. Data = Reader.read(0,0) n_pol = Data.dims[1] n_cal = Data.dims[2] n_chan = Data.dims[3] # Allowcate memory for the outputs. corr = np.zeros((n_bands_proc, n_pol, n_cal, n_chan), dtype=float) norm = np.zeros(corr.shape, dtype=int) freq = np.empty((n_bands_proc, n_chan)) for ii in range(n_bands_proc): Blocks = Reader.read((), ii) Blocks[0].calc_freq() freq[ii,:] = Blocks[0].freq # We are going to look for an exact match in for the map # frequencies. This could be made more general since the sub_map # function can handle partial overlap, but this will be fine for # now. for band_maps in self.maps: maps_freq = band_maps[0].get_axis('freq') if np.allclose(maps_freq, freq[ii,:]): maps = band_maps break else: raise NotImplementedError('No maps with frequency axis exactly' ' matching data.') # Check the polarization axis. If the same number of maps where # passed, check that the polarizations are in order. If only one # map was passed, correlate all data polarizations against it. data_pols = Blocks[0].field['CRVAL4'].copy() if len(band_maps) == 1: maps_to_correlate = band_maps * len(data_pols) else: for ii in range(len(data_pols)): if (misc.polint2str(data_pols[ii]) != self.params['map_polarizations'][ii]): msg = ('Map polarizations not in same order' ' as data polarizations.') raise NotImplementedError(map) maps_to_correlate = band_maps # Now process each block. for Data in Blocks: if params['diff_gain_cal_only']: if tuple(Data.field['CRVAL4']) != (-5, -7, -8, -6): msg = ("Expected polarizations to be ordered " "(XX, XY, YX, YY).") raise NotImplementedError(msg) Data.data[:,0,:,:] /= gain[ii,0,:,:] Data.data[:,3,:,:] /= gain[ii,3,:,:] cross_gain = np.sqrt(gain[ii,0,:,:] * gain[ii,3,:,:]) Data.data[:,1,:,:] /= cross_gain Data.data[:,2,:,:] /= cross_gain else: Data.data /= gain[ii,...] Writer.add_data(Data) # Write the data back out. utils.mkparents(out_filename) Writer.write(out_filename)
def execute(self, n_processes=1) : utils.mkparents(self.params['subtracted_output_root']) base_single.BaseSingle.execute(self, n_processes)
def corr_svd(data, params, file_ind, freq=None, time=None): if not hasattr(data, "mask"): data = np.ma.array(data) data.mask = np.logical_or(data.mask, np.logical_not(np.isfinite(data))) if np.all(data.mask[:, ::3, ...]): msg = "WARNING: all data are masked, no svd performed" warnings.warn(msg) return data # flag out the bad dots # sigma = np.ma.var(data) # mean = np.ma.mean(data) # data.mask = np.logical_or(data.mask, data < mean - 5*sigma) threshold = 4 bad_mask = np.any(data[:, ::3, ...] < threshold, axis=(1, 2)) data.mask = np.logical_or(data.mask, bad_mask[:, None, None, :]) mask_perc = np.ma.count_masked(data[:, ::3, ...]) / float(data[:, ::3, ...].size) if mask_perc > 0.8: msg = ("WARNING: %f%% data are masked, no svd performed") % (mask_perc * 100) warnings.warn(msg) return data data[..., :100] = np.ma.masked data[..., -100:] = np.ma.masked data[..., 1640:1740] = np.ma.masked data[..., 2066:2166] = np.ma.masked # freq_mask = np.logical_not(np.any(np.all(data.mask, axis=0), axis=1)) time_mask = np.logical_not(np.any(np.all(data.mask, axis=-1), axis=(1, 2))) freq_mask = np.logical_not(np.any(data.mask[time_mask, ...], axis=(0, 2))) # data[:,0,:,np.logical_not(freq_mask[0,:])] = np.ma.masked # data[:,1,:,np.logical_not(freq_mask[1,:])] = np.ma.masked # data[:,2,:,np.logical_not(freq_mask[2,:])] = np.ma.masked # data[:,3,:,np.logical_not(freq_mask[3,:])] = np.ma.masked data.mask = np.logical_or(data.mask, np.logical_not(freq_mask[None, :, None, :])) if not np.any(freq_mask[::3, :]): msg = "WARNING: all freq channels are masked, no svd performed" warnings.warn(msg) return data weights = np.ma.ones(data.shape) weights[data.mask] = np.ma.masked # subtract mean of each frequency # data_mean = np.sum(data, axis=0) / np.sum(weights, axis=0) # data_mean[data_mean==0] = np.inf # data /= data_mean[None, ...] # for XX if params["save_svd"]: map1_raw = copy.deepcopy(data[:, 0, 0, :].T) map2_raw = copy.deepcopy(data[:, 0, 1, :].T) if params["save_plot"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_raw_XX.hdf5" utils.mkparents(f_name) check_map(f_name, data[:, 0, :, :], time, freq) f_name = params["output_root"] + params["file_middles"][file_ind] + "_raw_YY.hdf5" utils.mkparents(f_name) check_map(f_name, data[:, 3, :, :], time, freq) corr, weight = find_modes.freq_covariance( data[:, 0, 0, :].T, data[:, 0, 1, :].T, weights[:, 0, 0, :].T, weights[:, 0, 1, :].T, freq_mask[0, :], freq_mask[0, :], no_weight=False, ) svd_result = find_modes.get_freq_svd_modes(corr, corr.shape[0]) map1, map2, outmap_left, outmap_right = subtract_foregrounds( svd_result, data[:, 0, 0, :].T, data[:, 0, 1, :].T, weights[:, 0, 0, :].T, weights[:, 0, 1, :].T, freq_mask[0, :], 0, params["modes"], ) data[:, 0, 0, :] = map1.T data[:, 0, 1, :] = map2.T if params["save_svd"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5" utils.mkparents(f_name) f = h5py.File(f_name, "w") f["singular_values"] = svd_result[0] f["left_vectors"] = svd_result[1] f["right_vectors"] = svd_result[2] f["outmap_left"] = outmap_left f["outmap_right"] = outmap_right f["map_left"] = map1 f["map_right"] = map2 f["raw_left"] = map1_raw f["raw_right"] = map2_raw f["freq_mask"] = freq_mask[0, :] f["freq"] = freq f["time"] = time f.close() if params["save_plot"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5" utils.mkparents(f_name) check_svd(f_name, svd_result, freq_mask[0, :], freq) check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq) f_name = params["output_root"] + params["file_middles"][file_ind] + "_spec_XX.hdf5" check_spec(f_name, np.ma.array(data[:, 0, :, :]), freq) # f_name = params['output_root'] + \ # params['file_middles'][file_ind] + '_corr_XX.hdf5' # check_corr(f_name, corr, weight) del corr, weight, svd_result, map1, map2, outmap_left, outmap_right gc.collect() # for YY if params["save_svd"]: map1_raw = copy.deepcopy(data[:, 3, 0, :].T) map2_raw = copy.deepcopy(data[:, 3, 1, :].T) corr, weight = find_modes.freq_covariance( data[:, 3, 0, :].T, data[:, 3, 1, :].T, weights[:, 3, 0, :].T, weights[:, 3, 1, :].T, freq_mask[3, :], freq_mask[3, :], no_weight=False, ) svd_result = find_modes.get_freq_svd_modes(corr, corr.shape[0]) map1, map2, outmap_left, outmap_right = subtract_foregrounds( svd_result, data[:, 3, 0, :].T, data[:, 3, 1, :].T, weights[:, 3, 0, :].T, weights[:, 3, 1, :].T, freq_mask[3, :], 0, params["modes"], ) data[:, 3, 0, :] = map1.T data[:, 3, 1, :] = map2.T if params["save_svd"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5" utils.mkparents(f_name) f = h5py.File(f_name, "w") f["singular_values"] = svd_result[0] f["left_vectors"] = svd_result[1] f["right_vectors"] = svd_result[2] f["outmap_left"] = outmap_left f["outmap_right"] = outmap_right f["map_left"] = map1 f["map_right"] = map2 f["raw_left"] = map1_raw f["raw_right"] = map2_raw f["freq_mask"] = freq_mask[3, :] f["freq"] = freq f["time"] = time f.close() if params["save_plot"]: f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5" utils.mkparents(f_name) check_svd(f_name, svd_result, freq_mask[3, :], freq) check_map(f_name, np.ma.array(data[:, 3, :, :]), time, freq) f_name = params["output_root"] + params["file_middles"][file_ind] + "_spec_YY.hdf5" check_spec(f_name, np.ma.array(data[:, 3, :, :]), freq) # f_name = params['output_root'] + \ # params['file_middles'][file_ind] + '_corr_XX.hdf5' # check_corr(f_name, corr, weight) del corr, weight, svd_result, map1, map2, outmap_left, outmap_right gc.collect() # data[data_mask] = np.inf # data = np.ma.array(data) # data[data_mask] = np.ma.masked return data