def __parse_config(self, config): """Parse the configuration options Arguments --------- config: configparser.SectionProxy Parsed options to initialize class Raise ----- DataError upon missing required variables """ # instance variables self.blinding = config.get("blinding") if self.blinding is None: raise DataError("Missing argument 'blinding' required by DesiData") if self.blinding not in ACCEPTED_BLINDING_STRATEGIES: raise DataError( "Unrecognized blinding strategy. Accepted strategies " f"are {ACCEPTED_BLINDING_STRATEGIES}. " f"Found '{self.blinding}'") self.num_processors = config.getint("num processors") if self.num_processors is None: raise DataError( "Missing argument 'num processors' required by DesiData") if self.num_processors == 0: self.num_processors = (multiprocessing.cpu_count() // 2) self.use_non_coadded_spectra = config.getboolean( "use non-coadded spectra") if self.use_non_coadded_spectra is None: raise DataError( "Missing argument 'use non-coadded spectra' required by DesiData" )
def read_file(self, filename, catalogue): """Read the spectra and formats its data as Forest instances. Arguments --------- filename: str Name of the file to read catalogue: astropy.table.Table The quasar catalogue fragment associated with this file Returns: --------- forests_by_targetid: dict Dictionary were forests are stored. num_data: int The number of instances loaded Raise ----- DataError if the analysis type is PK 1D and resolution data is not present """ raise DataError( "Function 'read_data' was not overloaded by child class")
def __init__(self, config): """Initialize class instance Arguments --------- config: configparser.SectionProxy Parsed options to initialize class Raise ----- DataError if the selected reading mode is not supported """ self.logger = logging.getLogger(__name__) # load variables from config self.mode = None self.__parse_config(config) super().__init__(config) # load DRQ Catalogue catalogue = DrqCatalogue(config).catalogue # read data if self.mode == "spplate": self.read_from_spplate(catalogue) elif self.mode == "spec": self.read_from_spec(catalogue) else: raise DataError( f"Error reading data in SdssData. Mode {self.mode} " "is not supported.")
def __parse_config(self, config): """Parse the configuration options Arguments --------- config: configparser.SectionProxy Parsed options to initialize class Raise ----- DataError upon missing required variables """ # instance variables self.mode = config.get("mode") if self.mode is None: raise DataError("Missing argument 'mode' required by SdssData") rebin = config.getint("rebin") if rebin is None: raise DataError("Missing argument 'rebin' required by SdssData") config["delta log lambda"] = str(rebin * 1e-4) del config["rebin"] config["wave solution"] = "log"
def read_data(self): """Read the spectra and formats its data as Forest instances. Method to be implemented by child classes. Return ------ is_mock: bool True if mocks are read, False otherwise is_sv: bool True if all the read data belong to SV. False otherwise Raise ----- DataError if no quasars were found """ raise DataError( "Function 'read_data' was not overloaded by child class")
def format_data(self, catalogue, spectrographs_data, targetid_spec, reso_from_truth=False): """After data has been read, format it into DesiForest instances Instances will be DesiForest or DesiPk1dForest depending on analysis_type Arguments --------- catalogue: astropy.table.Table The quasar catalogue fragment associated with this data spectrographs_data: dict The read data targetid_spec: int Targetid of the objects to format reso_from_truth: bool - Default: False Specifies whether resolution matrixes are read from truth files (True) or directly from data (False) Return ------ forests_by_targetid: dict Dictionary were forests are stored. num_data: int The number of instances loaded """ num_data = 0 forests_by_targetid = {} # Loop over quasars in catalogue fragment for row in catalogue: # Find which row in tile contains this quasar # It should be there by construction targetid = row["TARGETID"] w_t = np.where(targetid_spec == targetid)[0] if len(w_t) == 0: self.logger.warning( f"Error reading {targetid}. Ignoring object") continue if len(w_t) > 1: self.logger.warning( "Warning: more than one spectrum in this file " f"for {targetid}") else: w_t = w_t[0] # Construct DesiForest instance # Fluxes from the different spectrographs will be coadded for spec in spectrographs_data.values(): if self.use_non_coadded_spectra: ivar = np.atleast_2d(spec['IVAR'][w_t]) ivar_coadded_flux = np.atleast_2d( ivar * spec['FLUX'][w_t]).sum(axis=0) ivar = ivar.sum(axis=0) flux = (ivar_coadded_flux / ivar) else: flux = spec['FLUX'][w_t].copy() ivar = spec['IVAR'][w_t].copy() args = { "flux": flux, "ivar": ivar, "targetid": targetid, "ra": row['RA'], "dec": row['DEC'], "z": row['Z'], } args["log_lambda"] = np.log10(spec['WAVELENGTH']) if self.analysis_type == "BAO 3D": forest = DesiForest(**args) elif self.analysis_type == "PK 1D": if self.use_non_coadded_spectra: exposures_diff = exp_diff_desi(spec, w_t) if exposures_diff is None: continue else: exposures_diff = np.zeros(spec['WAVELENGTH'].shape) if reso_from_truth: reso_sum = spec['RESO'][:, :] else: if len(spec['RESO'][w_t].shape) < 3: reso_sum = spec['RESO'][w_t].copy() else: reso_sum = spec['RESO'][w_t].sum(axis=0) reso_in_pix, reso_in_km_per_s = spectral_resolution_desi( reso_sum, spec['WAVELENGTH']) args["exposures_diff"] = exposures_diff args["reso"] = reso_in_km_per_s args["resolution_matrix"] = reso_sum args["reso_pix"] = reso_in_pix forest = DesiPk1dForest(**args) # this should never be entered added here in case at some point # we add another analysis type else: # pragma: no cover raise DataError( "Unkown analysis type. Expected 'BAO 3D'" f"or 'PK 1D'. Found '{self.analysis_type}'") # rebin arrays # this needs to happen after all arrays are initialized by # Forest constructor forest.rebin() # keep the forest if targetid in forests_by_targetid: existing_forest = forests_by_targetid[targetid] existing_forest.coadd(forest) forests_by_targetid[targetid] = existing_forest else: forests_by_targetid[targetid] = forest num_data += 1 return forests_by_targetid, num_data
def read_data(self): """Read the spectra and formats its data as Forest instances. Return ------ is_mock: bool False as DESI data are not mocks is_sv: bool True if all the read data belong to SV. False otherwise Raise ----- DataError if the analysis type is PK 1D and resolution data is not present DataError if no quasars were found """ if np.any((self.catalogue['TILEID'] < 60000) & (self.catalogue['TILEID'] >= 1000)): is_sv = False else: is_sv = True coadd_name = "spectra" if self.use_non_coadded_spectra else "coadd" files_in = sorted( glob.glob(os.path.join(self.input_directory, f"**/{coadd_name}-*.fits"), recursive=True)) if "cumulative" in self.input_directory: petal_tile_night = [ f"{entry['PETAL_LOC']}-{entry['TILEID']}-thru{entry['LAST_NIGHT']}" for entry in self.catalogue ] else: petal_tile_night = [ f"{entry['PETAL_LOC']}-{entry['TILEID']}-{entry['NIGHT']}" for entry in self.catalogue ] # this uniqueness check is to ensure each petal/tile/night combination # only appears once in the filelist petal_tile_night_unique = np.unique(petal_tile_night) filenames = [] forests_by_targetid = {} for file_in in files_in: for petal_tile_night in petal_tile_night_unique: if petal_tile_night in os.path.basename(file_in): filenames.append(file_in) filenames = np.unique(filenames) if self.num_processors > 1: arguments = [(filename, self.catalogue) for filename in filenames] context = multiprocessing.get_context('fork') with context.Pool(processes=self.num_processors) as pool: imap_it = pool.imap( DesiTileFileHandler(self.analysis_type, self.use_non_coadded_spectra, self.logger, self.input_directory), arguments) for forests_by_targetid_aux, _ in imap_it: # Merge each dict to master forests_by_targetid merge_new_forest(forests_by_targetid, forests_by_targetid_aux) else: num_data = 0 reader = DesiTileFileHandler(self.analysis_type, self.use_non_coadded_spectra, self.logger, self.input_directory) for index, filename in enumerate(filenames): forests_by_targetid_aux, num_data_aux = reader( (filename, self.catalogue)) merge_new_forest(forests_by_targetid, forests_by_targetid_aux) num_data += num_data_aux self.logger.progress( f"read tile {index} of {len(filename)}. ndata: {num_data}") self.logger.progress( f"Found {num_data} quasars in input files") if len(forests_by_targetid) == 0: raise DataError("No Quasars found, stopping here") self.forests = list(forests_by_targetid.values()) return False, is_sv
def read_file(self, filename, catalogue): """Read the spectra and formats its data as Forest instances. Arguments --------- filename: str Name of the file to read catalogue: astropy.table.Table The quasar catalogue fragment associated with this file Returns: --------- forests_by_targetid: dict Dictionary were forests are stored. num_data: int The number of instances loaded Raise ----- DataError if the analysis type is PK 1D and resolution data is not present """ try: hdul = fitsio.FITS(filename) except IOError: self.logger.warning( f"Error reading file {filename}. Ignoring file") return {}, 0 fibermap = hdul['FIBERMAP'].read() ra = fibermap['TARGET_RA'] dec = fibermap['TARGET_DEC'] tile_spec = fibermap['TILEID'][0] if "cumulative" in self.input_directory: night_spec = int(filename.split('thru')[-1].split('.')[0]) else: night_spec = int(filename.split('-')[-1].split('.')[0]) colors = ['B', 'R', 'Z'] ra = np.radians(ra) dec = np.radians(dec) petal_spec = fibermap['PETAL_LOC'][0] spectrographs_data = {} for color in colors: try: spec = {} spec['WAVELENGTH'] = hdul[f'{color}_WAVELENGTH'].read() spec['FLUX'] = hdul[f'{color}_FLUX'].read() spec['IVAR'] = (hdul[f'{color}_IVAR'].read() * (hdul[f'{color}_MASK'].read() == 0)) if self.analysis_type == "PK 1D": if f"{color}_RESOLUTION" in hdul: spec["RESO"] = hdul[f"{color}_RESOLUTION"].read() else: raise DataError( "Error while reading {color} band from " "{filename}. Analysis type is 'PK 1D', " "but file does not contain HDU " f"'{color}_RESOLUTION' ") w = np.isnan(spec['FLUX']) | np.isnan(spec['IVAR']) for key in ['FLUX', 'IVAR']: spec[key][w] = 0. spectrographs_data[color] = spec except OSError: self.logger.warning( f"Error while reading {color} band from {filename}." "Ignoring color.") hdul.close() if "cumulative" in self.input_directory: select = ((catalogue['TILEID'] == tile_spec) & (catalogue['PETAL_LOC'] == petal_spec) & (catalogue['LAST_NIGHT'] == night_spec)) else: select = ((catalogue['TILEID'] == tile_spec) & (catalogue['PETAL_LOC'] == petal_spec) & (catalogue['NIGHT'] == night_spec)) self.logger.progress( f'This is tile {tile_spec}, petal {petal_spec}, night {night_spec}' ) forests_by_targetid, num_data = self.format_data( catalogue[select], spectrographs_data, fibermap["TARGETID"], ) return forests_by_targetid, num_data
def read_from_spec(self, catalogue): """Read the spectra and formats its data as Forest instances. Arguments --------- catalogue: astropy.table.Table Table with the DRQ catalogue """ self.logger.progress(f"Reading {len(catalogue)} objects") forests_by_thingid = {} #-- Loop over unique objects for row in catalogue: thingid = row['THING_ID'] plate = row["PLATE"] mjd = row["MJD"] fiberid = row["FIBERID"] filename = (f"{self.input_directory}/{plate}/spec-{plate}-{mjd}-" f"{fiberid:04d}.fits") try: hdul = fitsio.FITS(filename) except IOError: self.logger.warning(f"Error reading {filename}. Ignoring file") continue self.logger.progress(f"Read {filename}") log_lambda = np.array(hdul[1]["loglam"][:], dtype=np.float64) flux = np.array(hdul[1]["flux"][:], dtype=np.float64) ivar = (np.array(hdul[1]["ivar"][:], dtype=np.float64) * hdul[1]["and_mask"][:] == 0) if self.analysis_type == "BAO 3D": forest = SdssForest( **{ "log_lambda": log_lambda, "flux": flux, "ivar": ivar, "thingid": thingid, "ra": row["RA"], "dec": row["DEC"], "z": row["Z"], "plate": plate, "mjd": mjd, "fiberid": fiberid }) elif self.analysis_type == "PK 1D": # compute difference between exposure exposures_diff = exp_diff(hdul, log_lambda) # compute spectral resolution wdisp = hdul[1]["wdisp"][:] reso = spectral_resolution(wdisp, True, fiberid, log_lambda) forest = SdssPk1dForest( **{ "log_lambda": log_lambda, "flux": flux, "ivar": ivar, "thingid": thingid, "ra": row["RA"], "dec": row["DEC"], "z": row["Z"], "plate": plate, "mjd": mjd, "fiberid": fiberid, "exposures_diff": exposures_diff, "reso": reso, "reso_pix": wdisp }) else: raise DataError(f"analysis_type = {self.analysis_type}") forest.rebin() if thingid in forests_by_thingid: forests_by_thingid[thingid].coadd(forest) else: forests_by_thingid[thingid] = forest self.forests = list(forests_by_thingid.values())
def __parse_config(self, config): """Parse the configuration options Arguments --------- config: configparser.SectionProxy Parsed options to initialize class Raise ----- DataError upon missing required variables """ # setup Forest class variables wave_solution = config.get("wave solution") if wave_solution is None: raise DataError( "Missing argument 'wave solution' required by Data") if wave_solution not in ["lin", "log"]: raise DataError( "Unrecognised value for 'wave solution'. Expected either " f"'lin' or 'log'. Found '{wave_solution}'.") if wave_solution == "log": pixel_step = config.getfloat("delta log lambda") if pixel_step is None: raise DataError( "Missing argument 'delta log lambda' required by " "Data when 'wave solution' is set to 'log'") pixel_step_rest_frame = config.getfloat( "delta log lambda rest frame") if pixel_step_rest_frame is None: pixel_step_rest_frame = pixel_step self.logger.info( "'delta log lambda rest frame' not set, using " "the same value as for 'delta log lambda' " f"({pixel_step_rest_frame})") elif wave_solution == "lin": pixel_step = config.getfloat("delta lambda") if pixel_step is None: raise DataError("Missing argument 'delta lambda' required by " "Data when 'wave solution' is set to 'lin'") pixel_step_rest_frame = config.getfloat("delta lambda rest frame") if pixel_step_rest_frame is None: pixel_step_rest_frame = pixel_step self.logger.info( "'delta lambda rest frame' not set, using " f"the same value as for 'delta lambda' ({pixel_step_rest_frame})" ) # this should not be reached as wave_solution is either "lin" or "log" # added here only in case we add another wave_solution in the future else: # pragma: no cover raise DataError( "Unrecognised value for 'wave solution'. Expected either " f"'lin' or 'log'. Found '{wave_solution}'.") lambda_max = config.getfloat("lambda max") if lambda_max is None: raise DataError("Missing argument 'lambda max' required by Data") lambda_max_rest_frame = config.getfloat("lambda max rest frame") if lambda_max_rest_frame is None: raise DataError( "Missing argument 'lambda max rest frame' required by Data") lambda_min = config.getfloat("lambda min") if lambda_min is None: raise DataError("Missing argument 'lambda min' required by Data") lambda_min_rest_frame = config.getfloat("lambda min rest frame") if lambda_min_rest_frame is None: raise DataError( "Missing argument 'lambda min rest frame' required by Data") Forest.set_class_variables(lambda_min, lambda_max, lambda_min_rest_frame, lambda_max_rest_frame, pixel_step, pixel_step_rest_frame, wave_solution) # instance variables self.analysis_type = config.get("analysis type") if self.analysis_type is None: raise DataError( "Missing argument 'analysis type' required by Data") if self.analysis_type not in accepted_analysis_type: raise DataError("Invalid argument 'analysis type' required by " f"Data. Found: '{self.analysis_type}'. Accepted " "values: " + ",".join(accepted_analysis_type)) if self.analysis_type == "PK 1D": lambda_abs_igm_name = config.get("lambda abs IGM") if lambda_abs_igm_name is None: raise DataError( "Missing argument 'lambda abs IGM' required by Data " "when 'analysys type' is 'PK 1D'") Pk1dForest.lambda_abs_igm = ABSORBER_IGM.get(lambda_abs_igm_name) if Pk1dForest.lambda_abs_igm is None: raise DataError( "Invalid argument 'lambda abs IGM' required by " f"Data. Found: '{lambda_abs_igm_name}'. Accepted " "values: " + ", ".join(ABSORBER_IGM)) self.input_directory = config.get("input directory") if self.input_directory is None: raise DataError( "Missing argument 'input directory' required by Data") self.min_num_pix = config.getint("minimum number pixels in forest") if self.min_num_pix is None: raise DataError( "Missing argument 'minimum number pixels in forest' " "required by Data") self.out_dir = config.get("out dir") if self.out_dir is None: raise DataError("Missing argument 'out dir' required by Data") self.rejection_log_file = config.get("rejection log file") if self.rejection_log_file is None: raise DataError( "Missing argument 'rejection log file' required by Data") if "/" in self.rejection_log_file: raise DataError("Error constructing Data. " "'rejection log file' should not incude folders. " f"Found: {self.rejection_log_file}") if not (self.rejection_log_file.endswith(".fits") or self.rejection_log_file.endswith(".fits.gz")): raise DataError("Error constructing Data. Invalid extension for " "'rejection log file'. Filename " "should en with '.fits' or '.fits.gz'. Found " f"'{self.rejection_log_file}'") if self.analysis_type == "BAO 3D": self.min_snr = config.getfloat("minimal snr bao3d") elif self.analysis_type == "PK 1D": self.min_snr = config.getfloat("minimal snr pk1d") # this should not be reached as analysis_type is either "BAO 3D" or # "PK 1D" added here only in case we add another analysis_type in the # future else: # pragma: no cover raise DataError("Invalid argument 'analysis type' required by " f"Data. Found: '{self.analysis_type}'. Accepted " "values: " + ",".join(accepted_analysis_type)) if self.min_snr is None: raise DataError( "Missing argument 'minimal snr bao3d' (if 'analysis type' = " "'BAO 3D') or ' minimal snr pk1d' (if 'analysis type' = 'Pk1d') " "required by Data")