def __init__(self, name, maptree, response, n_transits=None, **kwargs): # This controls if the likeHAWC class should load the entire # map or just a small disc around a source (faster). # Default is the latter, which is way faster. LIFF will decide # autonomously which ROI to use depending on the source model self.fullsky = False if 'fullsky' in kwargs.keys(): self.fullsky = bool(kwargs['fullsky']) self.name = str(name) # Sanitize files in input (expand variables and so on) self.maptree = os.path.abspath(sanitize_filename(maptree)) self.response = os.path.abspath(sanitize_filename(response)) # Check that they exists and can be read if not file_existing_and_readable(self.maptree): raise IOError("MapTree %s does not exist or is not readable" % maptree) if not file_existing_and_readable(self.response): raise IOError("Response %s does not exist or is not readable" % response) # Post-pone the creation of the LIFF instance to when # we have the likelihood model self.instanced = False # Number of transits if n_transits is not None: self._n_transits = float(n_transits) else: self._n_transits = None # Default value for minChannel and maxChannel self.minChannel = int(defaultMinChannel) self.maxChannel = int(defaultMaxChannel) # By default the fit of the CommonNorm is deactivated self.deactivateCommonNorm() # This is to keep track of whether the user defined a ROI or not self.roi_ra = None # Further setup self.__setup()
def test_sanatize(): file_name = sanitize_filename("test.txt") assert isinstance(file_name, Path) file_name = sanitize_filename("test.txt", abspath=True) assert file_name.is_absolute()
def get_basic_config(evfile, scfile, ra, dec, emin=100.0, emax=100000.0, zmax=100.0, evclass=128, evtype=3, filter='DATA_QUAL>0 && LAT_CONFIG==1'): from fermipy.config import ConfigManager # Get default config from fermipy basic_config = ConfigManager.load(get_path_of_data_file("fermipy_basic_config.yml")) # type: dict evfile = sanitize_filename(evfile) scfile = sanitize_filename(scfile) assert os.path.exists(evfile), "The provided evfile %s does not exist" % evfile assert os.path.exists(scfile), "The provided scfile %s does not exist" % scfile basic_config['data']['evfile'] = evfile basic_config['data']['scfile'] = scfile ra = float(ra) dec = float(dec) assert 0 <= ra <= 360, "The provided R.A. (%s) is not valid. Should be 0 <= ra <= 360.0" % ra assert -90 <= dec <= 90, "The provided Dec (%s) is not valid. Should be -90 <= dec <= 90.0" % dec basic_config['selection']['ra'] = ra basic_config['selection']['dec'] = dec emin = float(emin) emax = float(emax) basic_config['selection']['emin'] = emin basic_config['selection']['emax'] = emax zmax = float(zmax) assert 0.0 <= zmax <= 180.0, "The provided Zenith angle cut (zmax = %s) is not valid. " \ "Should be 0 <= zmax <= 180.0" % zmax basic_config['selection']['zmax'] = zmax evclass = int(evclass) assert is_power_of_2(evclass), "The provided evclass is not a power of 2." basic_config['selection']['evclass'] = evclass evtype = int(evtype) basic_config['selection']['evtype'] = evtype basic_config['selection']['filter'] = filter return DictWithPrettyPrint(basic_config)
def _read_arf_file(self, arf_file): """ read an arf file and apply it to the current_matrix :param arf_file: :param current_matrix: :param current_mc_channels: :return: """ arf_file = sanitize_filename(arf_file) self._arf_file = arf_file assert file_existing_and_readable(arf_file.split("{")[0]), "Ancillary file %s not existing or not " \ "readable" % arf_file with pyfits.open(arf_file) as f: data = f['SPECRESP'].data arf = data.field('SPECRESP') # Check that arf and rmf have same dimensions if arf.shape[0] != self.matrix.shape[1]: raise IOError( "The ARF and the RMF file does not have the same number of channels" ) # Check that the ENERG_LO and ENERG_HI for the RMF and the ARF # are the same energ_lo = data.field("ENERG_LO") energ_hi = data.field("ENERG_HI") assert self._are_contiguous( energ_lo, energ_hi), "Monte carlo energies in ARF are not contiguous!" arf_mc_channels = np.append(energ_lo, [energ_hi[-1]]) # Declare the mc channels different if they differ by more than 1% idx = (self.monte_carlo_energies > 0) diff = old_div((self.monte_carlo_energies[idx] - arf_mc_channels[idx]), self.monte_carlo_energies[idx]) if diff.max() > 0.01: raise IOError( "The ARF and the RMF have one or more MC channels which differ by more than 1%" ) # Multiply ARF and RMF matrix = self.matrix * arf # Override the matrix with the one multiplied by the arf self.replace_matrix(matrix)
def hawc_point_source_fitted_joint_like(): data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists( response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,3.15e-11,2.37,42.3 # at this position: # 100,22 # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() source = PointSource("TestSource", ra=100.0, dec=22.0, spectral_shape=spectrum) spectrum.K = 3.15e-11 / (u.TeV * u.cm**2 * u.s) spectrum.K.bounds = (1e-22, 1e-18) # without units energies are in keV spectrum.piv = 1 * u.TeV spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) q = source(1 * u.keV) assert np.isclose(q.value, 67.3458058177) # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) llh = HAWCLike("HAWC", maptree, response) llh.set_active_measurements(1, 9) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=True) jl.set_minimizer("ROOT") parameter_frame, like = jl.fit(compute_covariance=False) return jl, parameter_frame, like
def test_CommonNorm_fit(): assert is_plugin_available("HAWCLike"), "HAWCLike is not available!" data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists(response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,3.15e-11,2.37,42.3 # at this position: # 100,22 # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() source = PointSource("TestSource", ra=100.0, dec=22.0, spectral_shape=spectrum) spectrum.K = 3.15e-11 / (u.TeV * u.cm ** 2 * u.s) spectrum.K.bounds = (1e-22, 1e-18) # without units energies are in keV spectrum.K.fix = True spectrum.piv = 1 * u.TeV spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) spectrum.index.free = False spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) spectrum.xc.free = False q = source(1 * u.keV) assert np.isclose(q.value, 67.3458058177) # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) llh = HAWCLike("HAWC", maptree, response) llh.set_active_measurements(1, 9) llh.activate_CommonNorm() # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=True) jl.set_minimizer("ROOT") parameter_frame, like = jl.fit(compute_covariance=False) assert np.isclose(lm.HAWC_ComNorm.value, 1.0756519971562115, rtol=1e-2)
def to_fits(self, filename: str, telescope_name: str, instrument_name: str, overwrite: bool = False) -> None: """ Write the current matrix into a OGIP FITS file :param filename : the name of the FITS file to be created :type filename : str :param telescope_name : a name for the telescope/experiment which this matrix applies to :param instrument_name : a name for the instrument which this matrix applies to :param overwrite: True or False, whether to overwrite or not the output file :return: None """ filename: Path = sanitize_filename(filename, abspath=True) fits_file = RSP( self.monte_carlo_energies, self.ebounds, self.matrix, telescope_name, instrument_name, ) fits_file.writeto(filename, overwrite=overwrite)
def restore_fit(self, filename): filename_sanitized = sanitize_filename(filename) with HDFStore(filename_sanitized) as store: coefficients = store['coefficients'] covariance = store['covariance'] self._polynomials = [] # create new polynomials for i in range(len(coefficients)): coeff = np.array(coefficients.loc[i]) # make sure we get the right order # pandas stores the non-needed coeff # as nans. coeff = coeff[np.isfinite(coeff)] cov = covariance.loc[i] self._polynomials.append( Polynomial.from_previous_fit(coeff, cov)) metadata = store.get_storer('coefficients').attrs.metadata self._optimal_polynomial_grade = metadata['poly_order'] poly_selections = np.array(metadata['poly_selections']) self._poly_intervals = TimeIntervalSet.from_starts_and_stops( poly_selections[:, 0], poly_selections[:, 1]) self._unbinned = metadata['unbinned'] if self._unbinned: self._fit_method_info['bin type'] = 'unbinned' else: self._fit_method_info['bin type'] = 'binned' self._fit_method_info['fit method'] = metadata['fit_method'] # go thru and count the counts! self._poly_fit_exists = True if self._time_selection_exists: self.set_active_time_intervals( *self._time_intervals.to_string().split(','))
def from_rsp2_file(cls, rsp2_file, exposure_getter, counts_getter, reference_time=0.0, half_shifted=True): # This assumes the Fermi/GBM rsp2 file format # make the rsp file proper rsp_file = sanitize_filename(rsp2_file) assert file_existing_and_readable(rsp_file), "OGIPResponse file %s not existing or not readable" % rsp_file # Will fill up the list of matrices list_of_matrices = [] # Read the response with pyfits.open(rsp_file) as f: n_responses = f['PRIMARY'].header['DRM_NUM'] # we will read all the matrices and save them for rsp_number in range(1, n_responses + 1): this_response = OGIPResponse(rsp2_file + '{%i}' % rsp_number) list_of_matrices.append(this_response) if half_shifted: # Now the GBM format has a strange feature: the matrix, instead of covering from TSTART to TSTOP, covers # from (TSTART + TSTOP) / 2.0 of the previous matrix to the (TSTART + TSTOP) / 2.0 of itself. # So let's adjust the coverage intervals accordingly if len(list_of_matrices) > 1: for i, this_matrix in enumerate(list_of_matrices): if i == 0: # The first matrix covers from its TSTART to its half time this_matrix._coverage_interval = TimeInterval(this_matrix.coverage_interval.start_time, this_matrix.coverage_interval.half_time) else: # Any other matrix covers from the half time of the previous matrix to its half time # However, the previous matrix has been already processed, so we use its stop time which # has already begun the half time of what it was before processing prev_matrix = list_of_matrices[i-1] this_matrix._coverage_interval = TimeInterval(prev_matrix.coverage_interval.stop_time, this_matrix.coverage_interval.half_time) return InstrumentResponseSet(list_of_matrices, exposure_getter, counts_getter, reference_time)
def _read_arf_file(self, arf_file): """ read an arf file and apply it to the current_matrix :param arf_file: :param current_matrix: :param current_mc_channels: :return: """ arf_file = sanitize_filename(arf_file) self._arf_file = arf_file assert file_existing_and_readable(arf_file.split("{")[0]), "Ancillary file %s not existing or not " \ "readable" % arf_file with pyfits.open(arf_file) as f: data = f['SPECRESP'].data arf = data.field('SPECRESP') # Check that arf and rmf have same dimensions if arf.shape[0] != self.matrix.shape[1]: raise IOError("The ARF and the RMF file does not have the same number of channels") # Check that the ENERG_LO and ENERG_HI for the RMF and the ARF # are the same energ_lo = data.field("ENERG_LO") energ_hi = data.field("ENERG_HI") assert self._are_contiguous(energ_lo, energ_hi), "Monte carlo energies in ARF are not contiguous!" arf_mc_channels = np.append(energ_lo, [energ_hi[-1]]) # Declare the mc channels different if they differ by more than 1% idx = (self.monte_carlo_energies > 0) diff = (self.monte_carlo_energies[idx] - arf_mc_channels[idx]) / self.monte_carlo_energies[idx] if diff.max() > 0.01: raise IOError("The ARF and the RMF have one or more MC channels which differ by more than 1%") # Multiply ARF and RMF matrix = self.matrix * arf # Override the matrix with the one multiplied by the arf self.replace_matrix(matrix)
def write_to(self, filename, overwrite=False): """ Write results to a FITS file :param filename: :param overwrite: :return: None """ fits_file = AnalysisResultsFITS(self) fits_file.writeto(sanitize_filename(filename), overwrite=overwrite)
def map_tree_factory(map_tree_file, roi): # Sanitize files in input (expand variables and so on) map_tree_file = sanitize_filename(map_tree_file) if os.path.splitext(map_tree_file)[-1] == '.root': return MapTree.from_root_file(map_tree_file, roi) else: return MapTree.from_hdf5(map_tree_file, roi)
def test_set_active_measurements(): data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists(response), "Data files do not exist at %s" % data_path llh = HAWCLike("HAWC", maptree, response) # Test one way llh.set_active_measurements(1, 9) # Test the other way llh.set_active_measurements(bin_list=['4','5','6','7','8','9'])
def test_set_active_measurements(): data_path = sanitize_filename(os.environ.get("HAWC_3ML_TEST_DATA_DIR"), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists(response), ( "Data files do not exist at %s" % data_path) llh = HAWCLike("HAWC", maptree, response) # Test one way llh.set_active_measurements(1, 9) # Test the other way llh.set_active_measurements(bin_list=["4", "5", "6", "7", "8", "9"])
def to_fits(self, filename, telescope_name, instrument_name, overwrite=False): """ Write the current matrix into a OGIP FITS file :param filename : the name of the FITS file to be created :type filename : str :param telescope_name : a name for the telescope/experiment which this matrix applies to :param instrument_name : a name for the instrument which this matrix applies to :param overwrite: True or False, whether to overwrite or not the output file :return: None """ filename = sanitize_filename(filename, abspath=True) fits_file = RSP(self.monte_carlo_energies, self.ebounds, self.matrix, telescope_name, instrument_name) fits_file.writeto(filename, clobber=overwrite)
def write(self, filename): """ Write the response to HDF5. :param filename: output file. WARNING: it will be overwritten if existing. :return: """ filename = sanitize_filename(filename) # Unravel the dec bins min_decs, center_decs, max_decs = zip(*self._dec_bins) # We get the definition of the response bins, as well as their coordinates (the dec center) and store them # in lists. Later on we will use these to make 3 dataframes containing all the needed data multi_index_keys = [] effarea_dfs = [] psf_dfs = [] all_metas = [] # Loop over all the dec bins (making sure that they are in order) for dec_center in sorted(center_decs): for bin_id in self._response_bins[dec_center]: response_bin = self._response_bins[dec_center][bin_id] this_effarea_df, this_meta, this_psf_df = response_bin.to_pandas( ) effarea_dfs.append(this_effarea_df) psf_dfs.append(this_psf_df) assert bin_id == response_bin.name, \ 'Bin name inconsistency: {} != {}'.format(bin_id, response_bin.name) multi_index_keys.append((dec_center, response_bin.name)) all_metas.append(pd.Series(this_meta)) # Create the dataframe with all the effective areas (with a multi-index) effarea_df = pd.concat(effarea_dfs, axis=0, keys=multi_index_keys) psf_df = pd.concat(psf_dfs, axis=0, keys=multi_index_keys) meta_df = pd.concat(all_metas, axis=1, keys=multi_index_keys).T # Now write the 4 dataframes to file with Serialization(filename, mode='w') as serializer: serializer.store_pandas_object('/dec_bins_definition', meta_df) serializer.store_pandas_object('/effective_area', effarea_df) serializer.store_pandas_object('/psf', psf_df)
def hawc_response_factory(response_file_name): """ A factory function for the response which keeps a cache, so that the same response is not read over and over again. :param response_file_name: :return: an instance of HAWCResponse """ response_file_name = sanitize_filename(response_file_name, abspath=True) # See if this response is in the cache, if not build it if not response_file_name in _instances: print("Creating singleton for %s" % response_file_name) # Use the extension of the file to figure out which kind of response it is (ROOT or HDF) extension = os.path.splitext(response_file_name)[-1] if extension == ".root": new_instance = HAWCResponse.from_root_file(response_file_name) elif extension in ['.hd5', '.hdf5']: new_instance = HAWCResponse.from_hdf5(response_file_name) else: # pragma: no cover raise NotImplementedError( "Extension %s for response file %s not recognized." % (extension, response_file_name)) _instances[response_file_name] = new_instance # return the response, whether it was already in the cache or we just built it return _instances[response_file_name] # type: HAWCResponse
def write_to(self, filename, overwrite=False): """ Write this set of results to a FITS file. :param filename: name for the output file :param overwrite: True or False :return: None """ if not hasattr(self, "_sequence_name"): # The user didn't specify what this sequence is # Make the default sequence frame_tuple = (('VALUE', range(len(self))), ) self.characterize_sequence("unspecified", frame_tuple) fits = AnalysisResultsFITS(*self, sequence_tuple=self._sequence_tuple, sequence_name=self._sequence_name) fits.writeto(sanitize_filename(filename), overwrite=overwrite)
def write(self, outfile_name: str, overwrite: bool = True, force_rsp_write: bool = False) -> None: """ Write a PHA Type II and BAK file for the given OGIP plugin. Automatically determines if BAK files should be generated. :param outfile_name: string (excluding .pha) of the PHA to write :param overwrite: (optional) bool to overwrite existing file :param force_rsp_write: force the writing of an RSP :return: """ outfile_name: Path = sanitize_filename(outfile_name) # Remove the .pha extension if any if outfile_name.suffix.lower() == ".pha": log.debug(f"stripping {outfile_name} of its suffix") outfile_name = outfile_name.stem self._outfile_basename = outfile_name self._outfile_name = { "pha": Path(f"{outfile_name}.pha"), "bak": Path(f"{outfile_name}_bak.pha"), } self._out_rsp = [] for ogip in self._ogiplike: self._append_ogip(ogip, force_rsp_write) self._write_phaII(overwrite)
def __init__(self, name, fermipy_config): """ :param name: a name for this instance :param fermipy_config: either a path to a YAML configuration file or a dictionary containing the configuration (see http://fermipy.readthedocs.io/) """ # There are no nuisance parameters nuisance_parameters = {} super(FermipyLike, self).__init__(name, nuisance_parameters=nuisance_parameters) # Check whether the provided configuration is a file if not isinstance(fermipy_config, dict): # Assume this is a file name configuration_file = sanitize_filename(fermipy_config) if not os.path.exists(fermipy_config): log.critical("Configuration file %s does not exist" % configuration_file) # Read the configuration with open(configuration_file) as f: self._configuration = yaml.load(f, Loader=yaml.SafeLoader) else: # Configuration is a dictionary. Nothing to do self._configuration = fermipy_config # If the user provided a 'model' key, issue a warning, as the model will be defined # later on and will overwrite the one contained in 'model' if "model" in self._configuration: custom_warnings.warn( "The provided configuration contains a 'model' section, which is useless as it " "will be overridden") self._configuration.pop("model") if "fileio" in self._configuration: custom_warnings.warn( "The provided configuration contains a 'fileio' section, which will be " "overwritten") self._configuration.pop("fileio") # Now check that the data exists # As minimum there must be a evfile and a scfile if not "evfile" in self._configuration["data"]: log.critical("You must provide a evfile in the data section") if not "scfile" in self._configuration["data"]: log.critical("You must provide a scfile in the data section") for datum in self._configuration["data"]: # Sanitize file name, as fermipy is not very good at handling relative paths or env. variables filename = str( sanitize_filename(self._configuration["data"][datum], True)) self._configuration["data"][datum] = filename if not os.path.exists(self._configuration["data"][datum]): log.critical("File %s (%s) not found" % (filename, datum)) # Prepare the 'fileio' part # Save all output in a directory with a unique name which depends on the configuration, # so that the same configuration will write in the same directory and fermipy will # know that it doesn't need to recompute things self._unique_id = "__%s" % _get_unique_tag_from_configuration( self._configuration) self._configuration["fileio"] = {"outdir": self._unique_id} # Ensure that there is a complete definition of a Region Of Interest (ROI) if not (("ra" in self._configuration["selection"]) and ("dec" in self._configuration["selection"])): log.critical( "You have to provide 'ra' and 'dec' in the 'selection' section of the configuration. Source name " "resolution, as well as Galactic coordinates, are not currently supported" ) # This is empty at the beginning, will be instanced in the set_model method self._gta = None
def get_heasarc_table_as_pandas(heasarc_table_name, update=False, cache_time_days=1): """ Obtain a a VO table from the HEASARC archives and return it as a pandas table indexed by object/trigger names. The heasarc_table_name values are the ones referenced at: https://heasarc.gsfc.nasa.gov/docs/archive/vo/ In order to speed up the processing of the tables, 3ML can cache the XML table in a cache that is updated every cache_time_days. The cache can be forced to update, i.e, reload from the web, by setting update to True. :param heasarc_table_name: the name of a HEASARC browse table :param update: force web read of the table and update cache :param cache_time_days: number of days to hold the current cache :return: pandas DataFrame with results and astropy table """ # make sure the table is a string assert type(heasarc_table_name) is str # point to the cache directory and create it if it is not existing cache_directory = os.path.join(os.path.expanduser('~'), '.threeML', '.cache') if_directory_not_existing_then_make(cache_directory) cache_file = os.path.join(cache_directory, '%s_cache.yml' % heasarc_table_name) cache_file_sanatized = sanitize_filename(cache_file) # build and sanitize the votable XML file that will be saved file_name = os.path.join(cache_directory, '%s_votable.xml' % heasarc_table_name) file_name_sanatized = sanitize_filename(file_name) if not file_existing_and_readable(cache_file_sanatized): print("The cache for %s does not yet exist. We will try to build it\n" % heasarc_table_name) write_cache = True cache_exists = False else: with open(cache_file_sanatized) as cache: # the cache file is two lines. The first is a datetime string that # specifies the last time the XML file was obtained yaml_cache = yaml.safe_load(cache) cached_time = astro_time.Time(datetime.datetime(*map(int, yaml_cache['last save'].split('-')))) # the second line how many seconds to keep the file around cache_valid_for = float(yaml_cache['cache time']) # now we will compare it to the current time in UTC current_time = astro_time.Time(datetime.datetime.utcnow(), scale='utc') delta_time = current_time - cached_time if delta_time.sec >= cache_valid_for: # ok, this is an old file, we will update it write_cache = True cache_exists = True else: # we write_cache = False cache_exists = True if write_cache or update: print("Building cache for %s.\n" % heasarc_table_name) # go to HEASARC and get the requested table heasarc_url = 'http://heasarc.gsfc.nasa.gov/cgi-bin/W3Browse/getvotable.pl?name=%s' % heasarc_table_name try: urllib.urlretrieve(heasarc_url, filename=file_name_sanatized) except(IOError): warnings.warn('The cache is outdated but the internet cannot be reached. Please check your connection') else: # Make sure the lines are interpreted as Unicode (otherwise some characters will fail) with open(file_name_sanatized) as table_file: new_lines = map(lambda x: x.decode("utf-8", errors="ignore"), table_file.readlines()) # now write the decoded lines back to the file with codecs.open(file_name_sanatized, "w+", "utf-8") as table_file: table_file.write("".join(new_lines)) # save the time that we go this table with open(cache_file_sanatized, 'w') as cache: yaml_dict = {} current_time = astro_time.Time(datetime.datetime.utcnow(), scale='utc') yaml_dict['last save'] = current_time.datetime.strftime('%Y-%m-%d-%H-%M-%S') seconds_in_day = 86400. yaml_dict['cache time'] = seconds_in_day * cache_time_days yaml.dump(yaml_dict, stream=cache, default_flow_style=False) # use astropy routines to read the votable with warnings.catch_warnings(): warnings.simplefilter("ignore") vo_table = votable.parse(file_name_sanatized) table = vo_table.get_first_table().to_table(use_names_over_ids=True) # create a pandas table indexed by name pandas_df = table.to_pandas().set_index('name') del vo_table return pandas_df
def save_background(self, filename, overwrite=False): """ save the background to an HD5F :param filename: :return: """ # make the file name proper filename = os.path.splitext(filename) filename = "%s.h5" % filename[0] filename_sanitized = sanitize_filename(filename) # Check that it does not exists if os.path.exists(filename_sanitized): if overwrite: try: os.remove(filename_sanitized) except: raise IOError("The file %s already exists and cannot be removed (maybe you do not have " "permissions to do so?). " % filename_sanitized) else: raise IOError("The file %s already exists!" % filename_sanitized) with HDFStore(filename_sanitized) as store: # extract the polynomial information and save it if self._poly_fit_exists: coeff = [] err = [] for poly in self._polynomials: coeff.append(poly.coefficients) err.append(poly.covariance_matrix) df_coeff = pd.Series(coeff) df_err = pd.Series(err) else: raise RuntimeError('the polynomials have not been fit yet') df_coeff.to_hdf(store, 'coefficients') df_err.to_hdf(store, 'covariance') store.get_storer('coefficients').attrs.metadata = {'poly_order': self._optimal_polynomial_grade, 'poly_selections': zip(self._poly_intervals.start_times, self._poly_intervals.stop_times), 'unbinned': self._unbinned, 'fit_method': self._fit_method_info['fit method']} if self._verbose: print("\nSaved fitted background to %s.\n" % filename)
def test_hawc_fullsky_options(): assert is_plugin_available("HAWCLike"), "HAWCLike is not available!" data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists(response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,3.15e-11,2.37,42.3 # at this position: # 100,22 # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() source = PointSource("TestSource", ra=100.0, dec=22.0, spectral_shape=spectrum) spectrum.K = 3.15e-11 / (u.TeV * u.cm ** 2 * u.s) spectrum.K.bounds = (1e-22, 1e-18) # without units energies are in keV spectrum.piv = 1 * u.TeV spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) q = source(1 * u.keV) assert np.isclose(q.value, 67.3458058177) # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) # Test with fullsky=True, and try to perform a fit to verify that we throw an exception llh = HAWCLike("HAWC", maptree, response, fullsky=True) llh.set_active_measurements(1, 9) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) with pytest.raises(RuntimeError): jl = JointLikelihood(lm, datalist, verbose=False) # Now we use set_ROI and this should work llh.set_ROI(100.0, 22.0, 2.0) jl = JointLikelihood(lm, datalist, verbose=False) # Now test that we can use set_ROI even though fullsky=False llh = HAWCLike("HAWC", maptree, response, fullsky=False) llh.set_active_measurements(1, 9) llh.set_ROI(100.0, 22.0, 1.0) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=False)
def __init__(self, name, maptree, response, n_transits=None, fullsky=False): # This controls if the likeHAWC class should load the entire # map or just a small disc around a source (faster). # Default is the latter, which is way faster. LIFF will decide # autonomously which ROI to use depending on the source model self._fullsky = bool(fullsky) # Sanitize files in input (expand variables and so on) self._maptree = os.path.abspath(sanitize_filename(maptree)) self._response = os.path.abspath(sanitize_filename(response)) # Check that they exists and can be read if not file_existing_and_readable(self._maptree): raise IOError("MapTree %s does not exist or is not readable" % maptree) if not file_existing_and_readable(self._response): raise IOError("Response %s does not exist or is not readable" % response) # Post-pone the creation of the LIFF instance to when # we have the likelihood model self._instanced = False # Number of transits if n_transits is not None: self._n_transits = float(n_transits) else: self._n_transits = None # Default list of bins self._bin_list = self._min_and_max_to_list(defaultMinChannel, defaultMaxChannel) # By default the fit of the CommonNorm is deactivated # NOTE: this flag sets the internal common norm minimization of LiFF, not # the common norm as nuisance parameter (which is controlled by activate_CommonNorm() and # deactivate_CommonNorm() self._fit_commonNorm = False # This is to keep track of whether the user defined a ROI or not self._roi_ra = None self._roi_fits = None self._roi_galactic = False # Create the dictionary of nuisance parameters self._nuisance_parameters = collections.OrderedDict() param_name = "%s_ComNorm" % name self._nuisance_parameters[param_name] = Parameter(param_name, 1.0, min_value=0.5, max_value=1.5, delta=0.01) self._nuisance_parameters[param_name].fix = True super(HAWCLike, self).__init__(name, self._nuisance_parameters)
def test_hawc_fullsky_options(): assert is_plugin_available("HAWCLike"), "HAWCLike is not available!" data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists( response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,3.15e-11,2.37,42.3 # at this position: # 100,22 # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() source = PointSource("TestSource", ra=100.0, dec=22.0, spectral_shape=spectrum) spectrum.K = 3.15e-11 / (u.TeV * u.cm**2 * u.s) spectrum.K.bounds = (1e-22, 1e-18) # without units energies are in keV spectrum.piv = 1 * u.TeV spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) q = source(1 * u.keV) assert np.isclose(q.value, 67.3458058177) # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) # Test with fullsky=True, and try to perform a fit to verify that we throw an exception llh = HAWCLike("HAWC", maptree, response, fullsky=True) llh.set_active_measurements(1, 9) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) with pytest.raises(RuntimeError): jl = JointLikelihood(lm, datalist, verbose=False) # Now we use set_ROI and this should work llh.set_ROI(100.0, 22.0, 2.0) jl = JointLikelihood(lm, datalist, verbose=False) # Now test that we can use set_ROI even though fullsky=False llh = HAWCLike("HAWC", maptree, response, fullsky=False) llh.set_active_measurements(1, 9) llh.set_ROI(100.0, 22.0, 1.0) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=False)
def download_GBM_trigger_data(trigger_name, detectors=None, destination_directory='.', compress_tte=True): """ Download the latest GBM TTE and RSP files from the HEASARC server. Will get the latest file version and prefer RSP2s over RSPs. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiGBMTTELike class. example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.') :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009' :param detectors: list of detectors, default is all detectors :param destination_directory: download directory :param compress_tte: compress the TTE files via gzip (default True) :return: a dictionary with information about the download """ # Let's doctor up the input just in case the user tried something strange sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Sanitize detector list (if any) if detectors is not None: for det in detectors: assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \ "Valid choices are: %s" % (det, _detector_list) else: detectors = list(_detector_list) # Open heasarc web page url = threeML_config['gbm']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist( "Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Now select the files we want to download, then we will download them later # We do it in two steps because we want to be able to choose what to download once we # have the complete picture # Get the list of remote files remote_file_list = downloader.files # This is the dictionary to keep track of the classification remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors]) # Classify the files detector by detector for this_file in remote_file_list: # this_file is something like glg_tte_n9_bn100101988_v00.fit tokens = this_file.split("_") if len(tokens) != 5: # Not a data file continue else: # The "map" is necessary to transform the tokens to normal string (instead of unicode), # because u"b0" != "b0" as a key for a dictionary _, file_type, detname, _, version_ext = list(map(str, tokens)) version, ext = version_ext.split(".") # We do not care here about the other files (tcat, bcat and so on), # nor about files which pertain to other detectors if file_type not in ['cspec', 'tte'] or ext not in [ 'rsp', 'rsp2', 'pha', 'fit' ] or detname not in detectors: continue # cspec files can be rsp, rsp2 or pha files. Classify them if file_type == 'cspec': if ext == 'rsp': remote_files_info[detname]['rsp'] = this_file elif ext == 'rsp2': remote_files_info[detname]['rsp2'] = this_file elif ext == 'pha': remote_files_info[detname]['cspec'] = this_file else: raise RuntimeError("Should never get here") else: remote_files_info[detname][file_type] = this_file # Now download the files download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors]) for detector in list(remote_files_info.keys()): remote_detector_info = remote_files_info[detector] local_detector_info = download_info[detector] # Get CSPEC file local_detector_info['cspec'] = downloader.download( remote_detector_info['cspec'], destination_directory, progress=True) # Get the RSP2 file if it exists, otherwise get the RSP file if 'rsp2' in remote_detector_info: local_detector_info['rsp'] = downloader.download( remote_detector_info['rsp2'], destination_directory, progress=True) else: local_detector_info['rsp'] = downloader.download( remote_detector_info['rsp'], destination_directory, progress=True) # Get TTE file (compressing it if requested) local_detector_info['tte'] = downloader.download( remote_detector_info['tte'], destination_directory, progress=True, compress=compress_tte) return download_info
def __init__(self, rsp_file, arf_file=None): """ :param rsp_file: :param arf_file: """ # Now make sure that the response file exist rsp_file = sanitize_filename(rsp_file) assert file_existing_and_readable(rsp_file.split("{")[0]), "OGIPResponse file %s not existing or not " \ "readable" % rsp_file # Check if we are dealing with a .rsp2 file (containing more than # one response). This is checked by looking for the syntax # [responseFile]{[responseNumber]} if '{' in rsp_file: tokens = rsp_file.split("{") rsp_file = tokens[0] rsp_number = int(tokens[-1].split('}')[0].replace(" ", "")) else: rsp_number = 1 self._rsp_file = rsp_file # Read the response with pyfits.open(rsp_file) as f: try: # This is usually when the response file contains only the energy dispersion data = f['MATRIX', rsp_number].data header = f['MATRIX', rsp_number].header if arf_file is None: warnings.warn("The response is in an extension called MATRIX, which usually means you also " "need an ancillary file (ARF) which you didn't provide. You should refer to the " "documentation of the instrument and make sure you don't need an ARF.") except Exception as e: warnings.warn("The default choice for MATRIX extension failed:"+repr(e)+\ "available: "+" ".join([repr(e.header.get('EXTNAME')) for e in f])) # Other detectors might use the SPECRESP MATRIX name instead, usually when the response has been # already convoluted with the effective area # Note that here we are not catching any exception, because # we have to fail if we cannot read the matrix data = f['SPECRESP MATRIX', rsp_number].data header = f['SPECRESP MATRIX', rsp_number].header # These 3 operations must be executed when the file is still open matrix = self._read_matrix(data, header) ebounds = self._read_ebounds(f['EBOUNDS']) mc_channels = self._read_mc_channels(data) # Now, if there is information on the coverage interval, let's use it header_start = header.get("TSTART", None) header_stop = header.get("TSTOP", None) if header_start is not None and header_stop is not None: super(OGIPResponse, self).__init__(matrix=matrix, ebounds=ebounds, monte_carlo_energies=mc_channels, coverage_interval=TimeInterval(header_start, header_stop)) else: super(OGIPResponse, self).__init__(matrix=matrix, ebounds=ebounds, monte_carlo_energies=mc_channels) # Read the ARF if there is any # NOTE: this has to happen *after* calling the parent constructor if arf_file is not None and arf_file.lower() != 'none': self._read_arf_file(arf_file) else: self._arf_file = None
def save_background(self, filename, overwrite=False): """ save the background to an HD5F :param filename: :return: """ # make the file name proper filename = os.path.splitext(filename) filename = "%s.h5" % filename[0] filename_sanitized = sanitize_filename(filename) # Check that it does not exists if os.path.exists(filename_sanitized): if overwrite: try: os.remove(filename_sanitized) except: raise IOError( "The file %s already exists and cannot be removed (maybe you do not have " "permissions to do so?). " % filename_sanitized) else: raise IOError("The file %s already exists!" % filename_sanitized) with HDFStore(filename_sanitized) as store: # extract the polynomial information and save it if self._poly_fit_exists: coeff = [] err = [] for poly in self._polynomials: coeff.append(poly.coefficients) err.append(poly.covariance_matrix) df_coeff = pd.Series(coeff) df_err = pd.Series(err) else: raise RuntimeError('the polynomials have not been fit yet') df_coeff.to_hdf(store, 'coefficients') df_err.to_hdf(store, 'covariance') store.get_storer('coefficients').attrs.metadata = { 'poly_order': self._optimal_polynomial_grade, 'poly_selections': zip(self._poly_intervals.start_times, self._poly_intervals.stop_times), 'unbinned': self._unbinned, 'fit_method': self._fit_method_info['fit method'] } if self._verbose: print("\nSaved fitted background to %s.\n" % filename)
def __init__(self, name, maptree, response, n_transits=None, fullsky=False): # This controls if the likeHAWC class should load the entire # map or just a small disc around a source (faster). # Default is the latter, which is way faster. LIFF will decide # autonomously which ROI to use depending on the source model self._fullsky = bool(fullsky) # Sanitize files in input (expand variables and so on) self._maptree = os.path.abspath(sanitize_filename(maptree)) self._response = os.path.abspath(sanitize_filename(response)) # Check that they exists and can be read if not file_existing_and_readable(self._maptree): raise IOError("MapTree %s does not exist or is not readable" % maptree) if not file_existing_and_readable(self._response): raise IOError("Response %s does not exist or is not readable" % response) # Post-pone the creation of the LIFF instance to when # we have the likelihood model self._instanced = False # Number of transits if n_transits is not None: self._n_transits = float(n_transits) else: self._n_transits = None # Default list of bins self._bin_list = self._min_and_max_to_list(defaultMinChannel, defaultMaxChannel) # By default the fit of the CommonNorm is deactivated # NOTE: this flag sets the internal common norm minimization of LiFF, not # the common norm as nuisance parameter (which is controlled by activate_CommonNorm() and # deactivate_CommonNorm() self._fit_commonNorm = False # This is to keep track of whether the user defined a ROI or not self._roi_ra = None self._roi_fits = None self._roi_galactic = False # Create the dictionary of nuisance parameters self._nuisance_parameters = collections.OrderedDict() param_name = "%s_ComNorm" % name self._nuisance_parameters[param_name] = Parameter( param_name, 1.0, min_value=0.5, max_value=1.5, delta=0.01 ) self._nuisance_parameters[param_name].fix = True super(HAWCLike, self).__init__(name, self._nuisance_parameters)
def download_LLE_trigger_data(trigger_name, destination_directory='.'): """ Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the latest file versions. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiLATLLELike class. example usage: download_LLE_trigger_data('080916009', destination_directory='.') :param trigger_name: trigger number (str) with no leading letter e.g. '080916009' :param destination_directory: download directory :return: a dictionary with information about the download """ sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Figure out the directory on the server url = threeML_config['LAT']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist( "Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on) pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)' destination_directory_sanitized = sanitize_filename(destination_directory) downloaded_files = downloader.download_all_files( destination_directory_sanitized, progress=True, pattern=pattern) # Put the files in a structured dictionary download_info = DictWithPrettyPrint() for download in downloaded_files: file_type = _file_type_match.match(os.path.basename(download)).group(1) if file_type == 'cspec': # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension, # or a response matrix (with a .rsp extension) ext = os.path.splitext(os.path.basename(download))[1] if ext == '.rsp': file_type = 'rsp' elif ext == '.pha': file_type = 'cspec' else: raise RuntimeError("Should never get here") # The pt file is really an ft2 file if file_type == 'pt': file_type = 'ft2' download_info[file_type] = download return download_info
def download(self, remote_filename, destination_path, new_filename=None, progress=True, compress=False): assert remote_filename in self.files, "File %s is not contained in this directory (%s)" % (remote_filename, self._request_result.url) destination_path = sanitize_filename(destination_path, abspath=True) assert path_exists_and_is_directory(destination_path), "Provided destination does not exist or " \ "is not a directory" % destination_path # If no filename is specified, use the same name that the file has on the remote server if new_filename is None: new_filename = remote_filename.split("/")[-1] # Get the fully qualified path for the remote and the local file remote_path = self._request_result.url + remote_filename local_path = os.path.join(destination_path, new_filename) # Ask the server for the file, but do not download it just yet # (stream=True will get the HTTP header but nothing else) # Use stream=True for two reasons: # * so that the file is not downloaded all in memory before being written to the disk # * so that we can report progress is requested this_request = requests.get(remote_path, stream=True) # Figure out the size of the file file_size = int(this_request.headers['Content-Length']) # Now check if we really need to download this file if compress: # Add a .gz at the end of the file path local_path += '.gz' if file_existing_and_readable(local_path): local_size = os.path.getsize(local_path) if local_size == file_size or compress: # if the compressed file already exists # it will have a smaller size # No need to download it again return local_path # Chunk size shouldn't bee too small otherwise we are causing a bottleneck in the download speed chunk_size = 1024 * 10 # If the user wants to compress the file, use gzip, otherwise the normal opener if compress: import gzip opener = gzip.open else: opener = open if progress: # Set a title for the progress bar bar_title = "Downloading %s" % new_filename with progress_bar(file_size, scale=1024 * 1024, units='Mb', title=bar_title) as bar: # type: ProgressBarBase with opener(local_path, 'wb') as f: for chunk in this_request.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) bar.increase(len(chunk)) this_request.close() else: with opener(local_path, 'wb') as f: for chunk in this_request.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) this_request.close() return local_path
def get_basic_config( evfile, scfile, ra, dec, emin=100.0, emax=100000.0, zmax=100.0, evclass=128, evtype=3, filter="DATA_QUAL>0 && LAT_CONFIG==1", fermipy_verbosity=2, fermitools_chatter=2, ): from fermipy.config import ConfigManager # Get default config from fermipy basic_config = ConfigManager.load( get_path_of_data_file("fermipy_basic_config.yml")) # type: dict evfile = str(sanitize_filename(evfile)) scfile = str(sanitize_filename(scfile)) if not os.path.exists(evfile): log.critical("The provided evfile %s does not exist" % evfile) if not os.path.exists(scfile): log.critical("The provided scfile %s does not exist" % scfile) basic_config["data"]["evfile"] = evfile basic_config["data"]["scfile"] = scfile ra = float(ra) dec = float(dec) if not ((0 <= ra) and (ra <= 360)): log.critical( "The provided R.A. (%s) is not valid. Should be 0 <= ra <= 360.0" % ra) if not ((-90 <= dec) and (dec <= 90)): log.critical( "The provided Dec (%s) is not valid. Should be -90 <= dec <= 90.0" % dec) basic_config["selection"]["ra"] = ra basic_config["selection"]["dec"] = dec emin = float(emin) emax = float(emax) basic_config["selection"]["emin"] = emin basic_config["selection"]["emax"] = emax zmax = float(zmax) if not ((0.0 <= zmax) and (zmax <= 180.0)): log.critical( "The provided Zenith angle cut (zmax = %s) is not valid. " "Should be 0 <= zmax <= 180.0" % zmax) basic_config["selection"]["zmax"] = zmax with fits.open(scfile) as ft2_: tmin = float(ft2_[0].header["TSTART"]) tmax = float(ft2_[0].header["TSTOP"]) basic_config["selection"]["tmin"] = tmin basic_config["selection"]["tmax"] = tmax evclass = int(evclass) if not is_power_of_2(evclass): log.critical("The provided evclass is not a power of 2.") basic_config["selection"]["evclass"] = evclass evtype = int(evtype) basic_config["selection"]["evtype"] = evtype basic_config["selection"]["filter"] = filter basic_config["logging"]["verbosity"] = fermipy_verbosity #(In fermipy convention, 0 = critical only, 1 also errors, 2 also warnings, 3 also info, 4 also debug) basic_config["logging"][ "chatter"] = fermitools_chatter #0 = no screen output. 2 = some output, 4 = lot of output. return DictWithPrettyPrint(basic_config)
def _get_fermipy_instance(configuration, likelihood_model): """ Generate a 'model' configuration section for fermipy starting from a likelihood model from astromodels :param configuration: a dictionary containing the configuration for fermipy :param likelihood_model: the input likelihood model from astromodels :type likelihood_model: astromodels.Model :return: a dictionary with the 'model' section of the fermipy configuration """ # Generate a new 'model' section in the configuration which reflects the model # provided as input # Get center and radius of ROI ra_center = float(configuration["selection"]["ra"]) dec_center = float(configuration["selection"]["dec"]) roi_width = float(configuration["binning"]["roiwidth"]) roi_radius = old_div(roi_width, np.sqrt(2.0)) # Get IRFS irfs = evclass_irf[int(configuration["selection"]["evclass"])] log.info(f"Using IRFs {irfs}") if "gtlike" in configuration and "irfs" in configuration["gtlike"]: if irfs.upper() != configuration["gtlike"]["irfs"].upper(): log.critical( "Evclass points to IRFS %s, while you specified %s in the " "configuration" % (irfs, configuration["gtlike"]["irfs"])) else: if not "gtlike" in configuration: configuration["gtlike"] = {} configuration["gtlike"]["irfs"] = irfs # The fermipy model is just a dictionary. It corresponds to the 'model' section # of the configuration file (http://fermipy.readthedocs.io/en/latest/config.html#model) fermipy_model = {} # Find Galactic and Isotropic templates appropriate for this IRFS # (information on the ROI is used to cut the Galactic template, which speeds up the # analysis a lot) # NOTE: these are going to be absolute paths galactic_template = str( sanitize_filename( findGalacticTemplate(irfs, ra_center, dec_center, roi_radius), True # noqa: F821 )) isotropic_template = str( sanitize_filename(findIsotropicTemplate(irfs), True)) # noqa: F821 # Add them to the fermipy model fermipy_model["galdiff"] = galactic_template fermipy_model["isodiff"] = isotropic_template # Now iterate over all sources contained in the likelihood model sources = [] # point sources for point_source in list(likelihood_model.point_sources.values() ): # type: astromodels.PointSource this_source = { "Index": 2.56233, "Scale": 572.78, "Prefactor": 2.4090e-12 } this_source["name"] = point_source.name this_source["ra"] = point_source.position.ra.value this_source["dec"] = point_source.position.dec.value # The spectrum used here is unconsequential, as it will be substituted by a FileFunction # later on. So I will just use PowerLaw for everything this_source["SpectrumType"] = "PowerLaw" sources.append(this_source) # extended sources for extended_source in list(likelihood_model.extended_sources.values() ): # type: astromodels.ExtendedSource raise NotImplementedError("Extended sources are not supported yet") # Add all sources to the model fermipy_model["sources"] = sources # Now we can finally instance the GTAnalysis instance configuration["model"] = fermipy_model gta = GTAnalysis(configuration) # noqa: F821 # This will take a long time if it's the first time we run with this model gta.setup() # Substitute all spectra for point sources with FileSpectrum, so that we will be able to control # them from 3ML energies_keV = None for point_source in list(likelihood_model.point_sources.values() ): # type: astromodels.PointSource # Fix this source, so fermipy will not optimize by itself the parameters gta.free_source(point_source.name, False) # This will substitute the current spectrum with a FileFunction with the same shape and flux gta.set_source_spectrum(point_source.name, "FileFunction", update_source=False) # Get the energies at which to evaluate this source this_log_energies, _flux = gta.get_source_dnde(point_source.name) this_energies_keV = (10**this_log_energies * 1e3 ) # fermipy energies are in GeV, we need keV if energies_keV is None: energies_keV = this_energies_keV else: # This is to make sure that all sources are evaluated at the same energies if not np.all(energies_keV == this_energies_keV): log.critical( "All sources should be evaluated at the same energies.") dnde = point_source(energies_keV) # ph / (cm2 s keV) dnde_per_MeV = dnde * 1000.0 # ph / (cm2 s MeV) gta.set_source_dnde(point_source.name, dnde_per_MeV, False) # Same for extended source for extended_source in list(likelihood_model.extended_sources.values() ): # type: astromodels.ExtendedSource raise NotImplementedError("Extended sources are not supported yet") return gta, energies_keV
def download_GBM_trigger_data(trigger_name, detectors=None, destination_directory='.', compress_tte=True): """ Download the latest GBM TTE and RSP files from the HEASARC server. Will get the latest file version and prefer RSP2s over RSPs. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiGBMTTELike class. example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.') :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009' :param detectors: list of detectors, default is all detectors :param destination_directory: download directory :param compress_tte: compress the TTE files via gzip (default True) :return: a dictionary with information about the download """ # Let's doctor up the input just in case the user tried something strange sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Sanitize detector list (if any) if detectors is not None: for det in detectors: assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \ "Valid choices are: %s" % (det, _detector_list) else: detectors = list(_detector_list) # Open heasarc web page url = threeML_config['gbm']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Now select the files we want to download, then we will download them later # We do it in two steps because we want to be able to choose what to download once we # have the complete picture # Get the list of remote files remote_file_list = downloader.files # This is the dictionary to keep track of the classification remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors]) # Classify the files detector by detector for this_file in remote_file_list: # this_file is something like glg_tte_n9_bn100101988_v00.fit tokens = this_file.split("_") if len(tokens) != 5: # Not a data file continue else: # The "map" is necessary to transform the tokens to normal string (instead of unicode), # because u"b0" != "b0" as a key for a dictionary _, file_type, detname, _, version_ext = map(str, tokens) version, ext = version_ext.split(".") # We do not care here about the other files (tcat, bcat and so on), # nor about files which pertain to other detectors if file_type not in ['cspec', 'tte'] or ext not in ['rsp','rsp2','pha','fit'] or detname not in detectors: continue # cspec files can be rsp, rsp2 or pha files. Classify them if file_type == 'cspec': if ext == 'rsp': remote_files_info[detname]['rsp'] = this_file elif ext == 'rsp2': remote_files_info[detname]['rsp2'] = this_file elif ext == 'pha': remote_files_info[detname]['cspec'] = this_file else: raise RuntimeError("Should never get here") else: remote_files_info[detname][file_type] = this_file # Now download the files download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors]) for detector in remote_files_info.keys(): remote_detector_info = remote_files_info[detector] local_detector_info = download_info[detector] # Get CSPEC file local_detector_info['cspec'] = downloader.download(remote_detector_info['cspec'], destination_directory, progress=True) # Get the RSP2 file if it exists, otherwise get the RSP file if 'rsp2' in remote_detector_info: local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp2'], destination_directory, progress=True) else: local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp'], destination_directory, progress=True) # Get TTE file (compressing it if requested) local_detector_info['tte'] = downloader.download(remote_detector_info['tte'], destination_directory, progress=True, compress=compress_tte) return download_info
def test_hawc_extended_source_fit(): # Ensure test environment is valid assert is_plugin_available("HAWCLike"), "HAWCLike is not available!" data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists( response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,1.32e-07,2.37,42.3 # at this position: # 100,22 # with a disk shape with an extension of 1.5 deg # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() shape = Disk_on_sphere() source = ExtendedSource("ExtSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = 100.0 shape.lon0.fix = True shape.lat0 = 22.0 shape.lat0.fix = True shape.radius = 1.5 * u.degree shape.radius.bounds = (0.5 * u.degree, 1.55 * u.degree) # shape.radius.fix = True spectrum.K = 4.39964273e-20 spectrum.K.bounds = (1e-24, 1e-17) spectrum.piv = 1 * u.TeV # spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) # spectrum.index.fix = True spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) spectrum.xc.fix = True # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) llh = HAWCLike("HAWC", maptree, response) llh.set_active_measurements(1, 9) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=True) jl.set_minimizer("ROOT") parameter_frame, like = jl.fit(compute_covariance=False) # Check that we have converged to the right solution # (the true value of course are not exactly the value simulated, # they are just the point where the fit should converge) assert is_within_tolerance( 4.7805737823025172e-20, parameter_frame['value']['ExtSource.spectrum.main.Cutoff_powerlaw.K']) assert is_within_tolerance( -2.44931279819, parameter_frame['value'] ['ExtSource.spectrum.main.Cutoff_powerlaw.index']) assert is_within_tolerance( 1.4273457159139373, parameter_frame['value']['ExtSource.Disk_on_sphere.radius']) assert is_within_tolerance(186389.581117, like['-log(likelihood)']['HAWC']) # Print up the TS, significance, and fit parameters, and then plot stuff print("\nTest statistic:") TS = llh.calc_TS() sigma = np.sqrt(TS) assert is_within_tolerance(3510.26, TS) assert is_within_tolerance(59.2475, sigma) print("Test statistic: %g" % TS) print("Significance: %g\n" % sigma) # Get the differential flux at 1 TeV diff_flux = spectrum(1 * u.TeV) # Convert it to 1 / (TeV cm2 s) diff_flux_TeV = diff_flux.to(1 / (u.TeV * u.cm**2 * u.s)) print("Norm @ 1 TeV: %s \n" % diff_flux_TeV) assert is_within_tolerance(4.66888328668e-11, diff_flux_TeV.value) spectrum.display() shape.display()
def __init__(self, name, phafile, bkgfile, rspfile, arffile=None): self.name = name # Check that all file exists notExistant = [] inputFiles = [phafile, bkgfile, rspfile] for i in range(3): # The file could contain a {#} specification, like spectrum.pha{3}, # which indicate the 3rd spectrum in the spectrum.pha file inputFiles[i] = file_utils.sanitize_filename(inputFiles[i].split("{")[0]) if not file_utils.file_existing_and_readable(inputFiles[i]): raise IOError("File %s does not exist or is not readable" % (inputFiles[i])) phafile, bkgfile, rspfile = inputFiles # Check the arf, if provided if arffile is not None: arffile = file_utils.sanitize_filename(arffile.split("{")[0]) if not file_utils.file_existing_and_readable(arffile): raise IOError("File %s does not exist or is not readable" % (arffile)) self.phafile = OGIPPHA(phafile, filetype="observed") self.exposure = self.phafile.getExposure() self.bkgfile = OGIPPHA(bkgfile, filetype="background") self.response = Response(rspfile, arffile) # Start with an empty mask (the user will overwrite it using the # setActiveMeasurement method) self.mask = numpy.asarray(numpy.ones(self.phafile.getRates().shape), numpy.bool) # Get the counts for this spectrum self.counts = self.phafile.getRates()[self.mask] * self.exposure # Check that counts is positive idx = self.counts < 0 if numpy.sum(idx) > 0: warnings.warn( "The observed spectrum for %s " % self.name + "has negative channels! Fixing those to zero.", RuntimeWarning, ) self.counts[idx] = 0 pass # Get the background counts for this spectrum self.bkgCounts = self.bkgfile.getRates()[self.mask] * self.exposure # Check that bkgCounts is positive idx = self.bkgCounts < 0 if numpy.sum(idx) > 0: warnings.warn( "The background spectrum for %s " % self.name + "has negative channels! Fixing those to zero.", RuntimeWarning, ) self.bkgCounts[idx] = 0 # Check that the observed counts are positive idx = self.counts < 0 if numpy.sum(idx) > 0: raise RuntimeError("Negative counts in observed spectrum %s. Data are corrupted." % (phafile)) # Keep a copy which will never be modified self.counts_backup = numpy.array(self.counts, copy=True) self.bkgCounts_backup = numpy.array(self.bkgCounts, copy=True) # Effective area correction is disabled by default, i.e., # the nuisance parameter is fixed to 1 self.nuisanceParameters = {} self.nuisanceParameters["InterCalib"] = Parameter("InterCalib", 1, min_value=0.9, max_value=1.1, delta=0.01) self.nuisanceParameters["InterCalib"].fix = True
def restore_fit(self, filename): filename_sanitized = sanitize_filename(filename) with HDFStore(filename_sanitized) as store: coefficients = store['coefficients'] covariance = store['covariance'] self._polynomials = [] # create new polynomials for i in range(len(coefficients)): coeff = np.array(coefficients.loc[i]) # make sure we get the right order # pandas stores the non-needed coeff # as nans. coeff = coeff[np.isfinite(coeff)] cov = covariance.loc[i] self._polynomials.append(Polynomial.from_previous_fit(coeff, cov)) metadata = store.get_storer('coefficients').attrs.metadata self._optimal_polynomial_grade = metadata['poly_order'] poly_selections = np.array(metadata['poly_selections']) self._poly_intervals = TimeIntervalSet.from_starts_and_stops(poly_selections[:, 0], poly_selections[:, 1]) self._unbinned = metadata['unbinned'] if self._unbinned: self._fit_method_info['bin type'] = 'unbinned' else: self._fit_method_info['bin type'] = 'binned' self._fit_method_info['fit method'] = metadata['fit_method'] # go thru and count the counts! self._poly_fit_exists = True # we must go thru and collect the polynomial exposure and counts # so that they be extracted if needed self._poly_exposure = 0. self._poly_selected_counts = [] for i, time_interval in enumerate(self._poly_intervals): t1 = time_interval.start_time t2 = time_interval.stop_time self._poly_selected_counts.append(self.count_per_channel_over_interval(t1,t2)) self._poly_exposure += self.exposure_over_interval(t1,t2) self._poly_selected_counts = np.sum(self._poly_selected_counts, axis=0) if self._time_selection_exists: self.set_active_time_intervals(*self._time_intervals.to_string().split(','))
def get_heasarc_table_as_pandas(heasarc_table_name, update=False, cache_time_days=1): """ Obtain a a VO table from the HEASARC archives and return it as a pandas table indexed by object/trigger names. The heasarc_table_name values are the ones referenced at: https://heasarc.gsfc.nasa.gov/docs/archive/vo/ In order to speed up the processing of the tables, 3ML can cache the XML table in a cache that is updated every cache_time_days. The cache can be forced to update, i.e, reload from the web, by setting update to True. :param heasarc_table_name: the name of a HEASARC browse table :param update: force web read of the table and update cache :param cache_time_days: number of days to hold the current cache :return: pandas DataFrame with results and astropy table """ # make sure the table is a string assert type(heasarc_table_name) is str # point to the cache directory and create it if it is not existing cache_directory = os.path.join(os.path.expanduser("~"), ".threeML", ".cache") if_directory_not_existing_then_make(cache_directory) cache_file = os.path.join(cache_directory, "%s_cache.yml" % heasarc_table_name) cache_file_sanatized = sanitize_filename(cache_file) # build and sanitize the votable XML file that will be saved file_name = os.path.join(cache_directory, "%s_votable.xml" % heasarc_table_name) file_name_sanatized = sanitize_filename(file_name) if not file_existing_and_readable(cache_file_sanatized): print( "The cache for %s does not yet exist. We will try to build it\n" % heasarc_table_name ) write_cache = True cache_exists = False else: with open(cache_file_sanatized) as cache: # the cache file is two lines. The first is a datetime string that # specifies the last time the XML file was obtained yaml_cache = yaml.load(cache, Loader=yaml.SafeLoader) cached_time = astro_time.Time( datetime.datetime(*list(map(int, yaml_cache["last save"].split("-")))) ) # the second line how many seconds to keep the file around cache_valid_for = float(yaml_cache["cache time"]) # now we will compare it to the current time in UTC current_time = astro_time.Time(datetime.datetime.utcnow(), scale="utc") delta_time = current_time - cached_time if delta_time.sec >= cache_valid_for: # ok, this is an old file, we will update it write_cache = True cache_exists = True else: # we write_cache = False cache_exists = True if write_cache or update: print("Building cache for %s.\n" % heasarc_table_name) # go to HEASARC and get the requested table heasarc_url = ( "http://heasarc.gsfc.nasa.gov/cgi-bin/W3Browse/getvotable.pl?name=%s" % heasarc_table_name ) try: urllib.request.urlretrieve(heasarc_url, filename=file_name_sanatized) except (IOError): warnings.warn( "The cache is outdated but the internet cannot be reached. Please check your connection" ) else: # # Make sure the lines are interpreted as Unicode (otherwise some characters will fail) with open(file_name_sanatized) as table_file: # might have to add this in for back compt J MICHAEL # new_lines = [x. for x in table_file.readlines()] new_lines = table_file.readlines() # now write the decoded lines back to the file with codecs.open(file_name_sanatized, "w+", "utf-8") as table_file: table_file.write("".join(new_lines)) # save the time that we go this table with open(cache_file_sanatized, "w") as cache: yaml_dict = {} current_time = astro_time.Time(datetime.datetime.utcnow(), scale="utc") yaml_dict["last save"] = current_time.datetime.strftime( "%Y-%m-%d-%H-%M-%S" ) seconds_in_day = 86400.0 yaml_dict["cache time"] = seconds_in_day * cache_time_days yaml.dump(yaml_dict, stream=cache, default_flow_style=False) # use astropy routines to read the votable with warnings.catch_warnings(): warnings.simplefilter("ignore") vo_table = votable.parse(file_name_sanatized) table = vo_table.get_first_table().to_table(use_names_over_ids=True) # make sure we do not use this as byte code table.convert_bytestring_to_unicode() # create a pandas table indexed by name pandas_df = table.to_pandas().set_index("name") del vo_table return pandas_df
def test_hawc_extended_source_fit(): # Ensure test environment is valid assert is_plugin_available("HAWCLike"), "HAWCLike is not available!" data_path = sanitize_filename(os.environ.get('HAWC_3ML_TEST_DATA_DIR'), abspath=True) maptree = os.path.join(data_path, _maptree_name) response = os.path.join(data_path, _response_name) assert os.path.exists(maptree) and os.path.exists(response), "Data files do not exist at %s" % data_path # The simulated source has this spectrum (credits for simulation: Colas Riviere): # CutOffPowerLaw,1.32e-07,2.37,42.3 # at this position: # 100,22 # with a disk shape with an extension of 1.5 deg # Define the spectral and spatial models for the source spectrum = Cutoff_powerlaw() shape = Disk_on_sphere() source = ExtendedSource("ExtSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = 100.0 shape.lon0.fix = True shape.lat0 = 22.0 shape.lat0.fix = True shape.radius = 1.5 * u.degree shape.radius.bounds = (0.5 * u.degree, 1.55 * u.degree) # shape.radius.fix = True spectrum.K = 4.39964273e-20 spectrum.K.bounds = (1e-24, 1e-17) spectrum.piv = 1 * u.TeV # spectrum.piv.fix = True spectrum.index = -2.37 spectrum.index.bounds = (-4, -1) # spectrum.index.fix = True spectrum.xc = 42.3 * u.TeV spectrum.xc.bounds = (1 * u.TeV, 100 * u.TeV) spectrum.xc.fix = True # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) llh = HAWCLike("HAWC", maptree, response) llh.set_active_measurements(1, 9) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=True) jl.set_minimizer("ROOT") parameter_frame, like = jl.fit(compute_covariance=False) # Check that we have converged to the right solution # (the true value of course are not exactly the value simulated, # they are just the point where the fit should converge) assert is_within_tolerance(4.7805737823025172e-20, parameter_frame['value']['ExtSource.spectrum.main.Cutoff_powerlaw.K']) assert is_within_tolerance(-2.44931279819, parameter_frame['value']['ExtSource.spectrum.main.Cutoff_powerlaw.index']) assert is_within_tolerance(1.4273457159139373, parameter_frame['value']['ExtSource.Disk_on_sphere.radius']) assert is_within_tolerance(186389.581117, like['-log(likelihood)']['HAWC']) # Print up the TS, significance, and fit parameters, and then plot stuff print("\nTest statistic:") TS = llh.calc_TS() sigma = np.sqrt(TS) assert is_within_tolerance(3510.26, TS) assert is_within_tolerance(59.2475, sigma) print("Test statistic: %g" % TS) print("Significance: %g\n" % sigma) # Get the differential flux at 1 TeV diff_flux = spectrum(1 * u.TeV) # Convert it to 1 / (TeV cm2 s) diff_flux_TeV = diff_flux.to(1 / (u.TeV * u.cm ** 2 * u.s)) print("Norm @ 1 TeV: %s \n" % diff_flux_TeV) assert is_within_tolerance(4.66888328668e-11, diff_flux_TeV.value) spectrum.display() shape.display()
def download_LAT_data(ra, dec, radius, tstart, tstop, time_type, data_type='Photon', destination_directory="."): """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ['MET', 'Gregorian', 'MJD'] assert time_type in _known_time_types, "Time type must be one of %s" % ",".join(_known_time_types) valid_classes = ['Photon', 'Extended'] assert data_type in valid_classes, "Data type must be one of %s" % ",".join(valid_classes) assert radius > 0, "Radius of the Region of Interest must be > 0" assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360" assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90" # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not os.path.exists(destination_directory): os.makedirs(destination_directory) # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url = threeML_config['LAT']['query form'] # Save parameters for the query in a dictionary query_parameters = {} query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec) query_parameters['coordsystem'] = "J2000" query_parameters['shapefield'] = "%s" % radius query_parameters['timefield'] = "%s,%s" % (tstart, tstop) query_parameters['timetype'] = "%s" % time_type query_parameters['energyfield'] = "30,1000000" # Download everything, we will chose later query_parameters['photonOrExtendedOrNone'] = data_type query_parameters['destination'] = 'query' query_parameters['spacecraft'] = 'checked' # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits")) ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits")) # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1 = None prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft1 = ft1 break if prev_downloaded_ft1 is not None: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None: print("Existing event file %s and Spacecraft file %s correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1, prev_downloaded_ft2)) return [prev_downloaded_ft1, prev_downloaded_ft2] # Print them out print("Query parameters:") for k, v in query_parameters.items(): print("%30s = %s" % (k, v)) # POST encoding postData = urllib.urlencode(query_parameters) temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.urlcleanup() # Get the form compiled try: urllib.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) except: raise RuntimeError("Problems with the download. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: lines.append(line.encode('utf-8')) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = \ filter(lambda x: x.find("The estimated time for your query to complete is") == 0, parser.data)[0] # Get the time estimate estimatedTimeForTheQuery = re.findall('The estimated time for your query to complete is ([0-9]+) seconds', estimatedTimeLine)[0] except: raise RuntimeError("Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later.") else: print("\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery) http_address = filter(lambda x: x.find("https://fermi.gsfc.nasa.gov") >= 0, parser.data)[0] print("\nIf this download fails, you can find your data at %s (when ready)\n" % http_address) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120) # Seconds refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.urlretrieve(http_address, fakeName, ) except socket.timeout: urllib.urlcleanup() raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that " "you can access %s, then retry" % threeML_config['LAT']['query form']) except: urllib.urlcleanup() raise RuntimeError("Problems with the download. Check your connection or that you can access " "%s, then retry." % threeML_config['LAT']['query form']) with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == '2': # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location'] if links != None: filenames = map(lambda x: x.split('/')[-1], links) print("\nDownloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [downloader.download(filename, destination_directory) for filename in filenames] else: raise RuntimeError("Could not download LAT Standard data") # Now we need to sort so that the FT1 is always first (they might be out of order) # If FT2 is first, switch them, otherwise do nothing if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None: # The FT2 is first, flip them downloaded_files = downloaded_files[::-1] # Finally, open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary for fits_file in downloaded_files: with pyfits.open(fits_file, mode='update') as f: f[0].header.set(_uid_fits_keyword, query_unique_id) return downloaded_files
def from_root_file(cls, response_file_name): """ Build response from a ROOT file. Do not use directly, use the hawc_response_factory function instead. :param response_file_name: :return: a HAWCResponse instance """ from ..root_handler import open_ROOT_file, get_list_of_keys, tree_to_ndarray # Make sure file is readable response_file_name = sanitize_filename(response_file_name) # Check that they exists and can be read if not file_existing_and_readable( response_file_name): # pragma: no cover raise IOError("Response %s does not exist or is not readable" % response_file_name) # Read response with open_ROOT_file(response_file_name) as root_file: # Get the name of the trees object_names = get_list_of_keys(root_file) # Make sure we have all the things we need assert 'LogLogSpectrum' in object_names assert 'DecBins' in object_names assert 'AnalysisBins' in object_names # Read spectrum used during the simulation log_log_spectrum = root_file.Get("LogLogSpectrum") # Get the analysis bins definition dec_bins_ = tree_to_ndarray(root_file.Get("DecBins")) dec_bins_lower_edge = dec_bins_['lowerEdge'] # type: np.ndarray dec_bins_upper_edge = dec_bins_['upperEdge'] # type: np.ndarray dec_bins_center = dec_bins_['simdec'] # type: np.ndarray dec_bins = zip(dec_bins_lower_edge, dec_bins_center, dec_bins_upper_edge) # Read in the ids of the response bins ("analysis bins" in LiFF jargon) try: response_bins_ids = tree_to_ndarray( root_file.Get("AnalysisBins"), "name") # type: np.ndarray except ValueError: try: response_bins_ids = tree_to_ndarray( root_file.Get("AnalysisBins"), "id") # type: np.ndarray except ValueError: # Some old response files (or energy responses) have no "name" branch custom_warnings.warn( "Response %s has no AnalysisBins 'id' or 'name' branch. " "Will try with default names" % response_file_name) response_bins_ids = None response_bins_ids = response_bins_ids.astype(str) # Now we create a dictionary of ResponseBin instances for each dec bin_name response_bins = collections.OrderedDict() for dec_id in range(len(dec_bins)): this_response_bins = collections.OrderedDict() min_dec, dec_center, max_dec = dec_bins[dec_id] # If we couldn't get the reponse_bins_ids above, let's use the default names if response_bins_ids is None: # Default are just integers. let's read how many nHit bins are from the first dec bin dec_id_label = "dec_%02i" % dec_id n_energy_bins = root_file.Get(dec_id_label).GetNkeys() response_bins_ids = range(n_energy_bins) for response_bin_id in response_bins_ids: this_response_bin = ResponseBin.from_ttree( root_file, dec_id, response_bin_id, log_log_spectrum, min_dec, dec_center, max_dec) this_response_bins[response_bin_id] = this_response_bin response_bins[dec_bins[dec_id][1]] = this_response_bins # Now the file is closed. Let's explicitly remove f so we are sure it is freed del root_file # Instance the class and return it instance = cls(response_file_name, dec_bins, response_bins) return instance
def download_LAT_data(ra, dec, radius, tstart, tstop, time_type, data_type='Photon', destination_directory="."): """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ['MET', 'Gregorian', 'MJD'] assert time_type in _known_time_types, "Time type must be one of %s" % ",".join( _known_time_types) valid_classes = ['Photon', 'Extended'] assert data_type in valid_classes, "Data type must be one of %s" % ",".join( valid_classes) assert radius > 0, "Radius of the Region of Interest must be > 0" assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360" assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90" # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not os.path.exists(destination_directory): os.makedirs(destination_directory) # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url = threeML_config['LAT']['query form'] # Save parameters for the query in a dictionary query_parameters = {} query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec) query_parameters['coordsystem'] = "J2000" query_parameters['shapefield'] = "%s" % radius query_parameters['timefield'] = "%s,%s" % (tstart, tstop) query_parameters['timetype'] = "%s" % time_type query_parameters[ 'energyfield'] = "30,1000000" # Download everything, we will chose later query_parameters['photonOrExtendedOrNone'] = data_type query_parameters['destination'] = 'query' query_parameters['spacecraft'] = 'checked' # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits")) ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits")) # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1 = None prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft1 = ft1 break if prev_downloaded_ft1 is not None: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None: print( "Existing event file %s and Spacecraft file %s correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1, prev_downloaded_ft2)) return [prev_downloaded_ft1, prev_downloaded_ft2] # Print them out print("Query parameters:") for k, v in query_parameters.items(): print("%30s = %s" % (k, v)) # POST encoding postData = urllib.parse.urlencode(query_parameters).encode('utf-8') temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.request.urlcleanup() # Get the form compiled try: urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) except Exception as e: print(e) raise RuntimeError( "Problems with the download. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: #lines.append(line.encode('utf-8')) lines.append(line) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = \ [x for x in parser.data if x.find("The estimated time for your query to complete is") == 0][0] # Get the time estimate estimatedTimeForTheQuery = re.findall( 'The estimated time for your query to complete is ([0-9]+) seconds', estimatedTimeLine)[0] except: raise RuntimeError( "Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later.") else: print("\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery) http_address = [ x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0 ][0] print( "\nIf this download fails, you can find your data at %s (when ready)\n" % http_address) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120) # Seconds refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.request.urlretrieve( http_address, fakeName, ) except socket.timeout: urllib.request.urlcleanup() raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that " "you can access %s, then retry" % threeML_config['LAT']['query form']) except Exception as e: print(e) urllib.request.urlcleanup() raise RuntimeError( "Problems with the download. Check your connection or that you can access " "%s, then retry." % threeML_config['LAT']['query form']) with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == '2': # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.request.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location'] if links != None: filenames = [x.split('/')[-1] for x in links] print("\nDownloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [ downloader.download(filename, destination_directory) for filename in filenames ] else: raise RuntimeError("Could not download LAT Standard data") # Now we need to sort so that the FT1 is always first (they might be out of order) # If FT2 is first, switch them, otherwise do nothing if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None: # The FT2 is first, flip them downloaded_files = downloaded_files[::-1] # Finally, open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary for fits_file in downloaded_files: with pyfits.open(fits_file, mode='update') as f: f[0].header.set(_uid_fits_keyword, query_unique_id) return downloaded_files
def __init__(self, rsp_file: str, arf_file: Optional[str] = None) -> None: """ :param rsp_file: :param arf_file: """ # Now make sure that the response file exist rsp_file: Path = sanitize_filename(rsp_file) if not fits_file_existing_and_readable(rsp_file): log.error( f"OGIPResponse file {rsp_file} not existing or not readable") raise RuntimeError() # Check if we are dealing with a .rsp2 file (containing more than # one response). This is checked by looking for the syntax # [responseFile]{[responseNumber]} if "{" in str(rsp_file): tokens = str(rsp_file).split("{") rsp_file: Path = sanitize_filename(tokens[0]) rsp_number = int(tokens[-1].split("}")[0].replace(" ", "")) else: rsp_number = 1 self._rsp_file: Path = rsp_file # Read the response with pyfits.open(rsp_file) as f: try: # This is usually when the response file contains only the energy dispersion data = f["MATRIX", rsp_number].data header = f["MATRIX", rsp_number].header if arf_file is None: log.warning( "The response is in an extension called MATRIX, which usually means you also " "need an ancillary file (ARF) which you didn't provide. You should refer to the " "documentation of the instrument and make sure you don't need an ARF." ) except Exception as e: log.warning( "The default choice for MATRIX extension failed:" + repr(e) + "available: " + " ".join([repr(e.header.get("EXTNAME")) for e in f])) # Other detectors might use the SPECRESP MATRIX name instead, usually when the response has been # already convoluted with the effective area # Note that here we are not catching any exception, because # we have to fail if we cannot read the matrix data = f["SPECRESP MATRIX", rsp_number].data header = f["SPECRESP MATRIX", rsp_number].header # These 3 operations must be executed when the file is still open matrix = self._read_matrix(data, header) ebounds = self._read_ebounds(f["EBOUNDS"]) mc_channels = self._read_mc_channels(data) # Now, if there is information on the coverage interval, let's use it header_start = header.get("TSTART", None) header_stop = header.get("TSTOP", None) if header_start is not None and header_stop is not None: super(OGIPResponse, self).__init__( matrix=matrix, ebounds=ebounds, monte_carlo_energies=mc_channels, coverage_interval=TimeInterval(header_start, header_stop), ) else: super(OGIPResponse, self).__init__(matrix=matrix, ebounds=ebounds, monte_carlo_energies=mc_channels) # Read the ARF if there is any # NOTE: this has to happen *after* calling the parent constructor self._arf_file: Optional[str] = None if arf_file is not None and str(arf_file).lower() != "none": self._read_arf_file(arf_file)
def from_root_file(map_tree_file, roi): """ Create a MapTree object from a ROOT file and a ROI. Do not use this directly, use map_tree_factory instead. :param map_tree_file: :param roi: :return: """ from ..root_handler import open_ROOT_file, root_numpy, tree_to_ndarray map_tree_file = sanitize_filename(map_tree_file) # Check that they exists and can be read if not file_existing_and_readable(map_tree_file): # pragma: no cover raise IOError("MapTree %s does not exist or is not readable" % map_tree_file) # Make sure we have a proper ROI (or None) assert isinstance(roi, HealpixROIBase) or roi is None, "You have to provide an ROI choosing from the " \ "available ROIs in the region_of_interest module" if roi is None: custom_warnings.warn("You have set roi=None, so you are reading the entire sky") # Read map tree with open_ROOT_file(map_tree_file) as f: data_bins_labels = list(root_numpy.tree2array(f.Get("BinInfo"), "name")) # A transit is defined as 1 day, and totalDuration is in hours # Get the number of transit from bin 0 (as LiFF does) n_transits = root_numpy.tree2array(f.Get("BinInfo"), "totalDuration") / 24.0 # The map-maker underestimate the livetime of bins with low statistic by removing time intervals with # zero events. Therefore, the best estimate of the livetime is the maximum of n_transits, which normally # happen in the bins with high statistic n_transits = max(n_transits) n_bins = len(data_bins_labels) # These are going to be Healpix maps, one for each data analysis bin_name data_analysis_bins = collections.OrderedDict() for i in range(n_bins): name = data_bins_labels[i] data_tobject = _get_bin_object(f, name, "data") bkg_tobject = _get_bin_object(f, name, "bkg") # Get ordering scheme nside = data_tobject.GetUserInfo().FindObject("Nside").GetVal() nside_bkg = bkg_tobject.GetUserInfo().FindObject("Nside").GetVal() assert nside == nside_bkg scheme = data_tobject.GetUserInfo().FindObject("Scheme").GetVal() scheme_bkg = bkg_tobject.GetUserInfo().FindObject("Scheme").GetVal() assert scheme == scheme_bkg assert scheme == 0, "NESTED scheme is not supported yet" if roi is not None: # Only read the elements in the ROI active_pixels = roi.active_pixels(nside, system='equatorial', ordering='RING') counts = _read_partial_tree(data_tobject, active_pixels) bkg = _read_partial_tree(bkg_tobject, active_pixels) counts_hpx = SparseHealpix(counts, active_pixels, nside) bkg_hpx = SparseHealpix(bkg, active_pixels, nside) this_data_analysis_bin = DataAnalysisBin(name, counts_hpx, bkg_hpx, active_pixels_ids=active_pixels, n_transits=n_transits, scheme='RING') else: # Read the entire sky. counts = tree_to_ndarray(data_tobject, "count").astype(np.float64) bkg = tree_to_ndarray(bkg_tobject, "count").astype(np.float64) this_data_analysis_bin = DataAnalysisBin(name, DenseHealpix(counts), DenseHealpix(bkg), active_pixels_ids=None, n_transits=n_transits, scheme='RING') data_analysis_bins[name] = this_data_analysis_bin return data_analysis_bins
def restore_fit(self, filename): filename_sanitized = sanitize_filename(filename) with HDFStore(filename_sanitized) as store: coefficients = store['coefficients'] covariance = store['covariance'] self._polynomials = [] # create new polynomials for i in range(len(coefficients)): coeff = np.array(coefficients.loc[i]) # make sure we get the right order # pandas stores the non-needed coeff # as nans. coeff = coeff[np.isfinite(coeff)] cov = covariance.loc[i] self._polynomials.append( Polynomial.from_previous_fit(coeff, cov)) metadata = store.get_storer('coefficients').attrs.metadata self._optimal_polynomial_grade = metadata['poly_order'] poly_selections = np.array(metadata['poly_selections']) self._poly_intervals = TimeIntervalSet.from_starts_and_stops( poly_selections[:, 0], poly_selections[:, 1]) self._unbinned = metadata['unbinned'] if self._unbinned: self._fit_method_info['bin type'] = 'unbinned' else: self._fit_method_info['bin type'] = 'binned' self._fit_method_info['fit method'] = metadata['fit_method'] # go thru and count the counts! self._poly_fit_exists = True # we must go thru and collect the polynomial exposure and counts # so that they be extracted if needed self._poly_exposure = 0. self._poly_selected_counts = [] for i, time_interval in enumerate(self._poly_intervals): t1 = time_interval.start_time t2 = time_interval.stop_time self._poly_selected_counts.append( self.count_per_channel_over_interval(t1, t2)) self._poly_exposure += self.exposure_over_interval(t1, t2) self._poly_selected_counts = np.sum(self._poly_selected_counts, axis=0) if self._time_selection_exists: self.set_active_time_intervals( *self._time_intervals.to_string().split(','))
def download_LLE_trigger_data(trigger_name, destination_directory='.'): """ Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the latest file versions. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiLATLLELike class. example usage: download_LLE_trigger_data('080916009', destination_directory='.') :param trigger_name: trigger number (str) with no leading letter e.g. '080916009' :param destination_directory: download directory :return: a dictionary with information about the download """ sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Figure out the directory on the server url = threeML_config['LAT']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on) pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)' destination_directory_sanitized = sanitize_filename(destination_directory) downloaded_files = downloader.download_all_files(destination_directory_sanitized, progress=True, pattern=pattern) # Put the files in a structured dictionary download_info = DictWithPrettyPrint() for download in downloaded_files: file_type = _file_type_match.match(os.path.basename(download)).group(1) if file_type == 'cspec': # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension, # or a response matrix (with a .rsp extension) ext = os.path.splitext(os.path.basename(download))[1] if ext == '.rsp': file_type = 'rsp' elif ext == '.pha': file_type = 'cspec' else: raise RuntimeError("Should never get here") # The pt file is really an ft2 file if file_type == 'pt': file_type = 'ft2' download_info[file_type] = download return download_info