def download_LAT_data(ra, dec, radius, tstart, tstop, time_type, data_type='Photon', destination_directory="."): """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ['MET', 'Gregorian', 'MJD'] assert time_type in _known_time_types, "Time type must be one of %s" % ",".join( _known_time_types) valid_classes = ['Photon', 'Extended'] assert data_type in valid_classes, "Data type must be one of %s" % ",".join( valid_classes) assert radius > 0, "Radius of the Region of Interest must be > 0" assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360" assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90" # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not os.path.exists(destination_directory): os.makedirs(destination_directory) # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url = threeML_config['LAT']['query form'] # Save parameters for the query in a dictionary query_parameters = {} query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec) query_parameters['coordsystem'] = "J2000" query_parameters['shapefield'] = "%s" % radius query_parameters['timefield'] = "%s,%s" % (tstart, tstop) query_parameters['timetype'] = "%s" % time_type query_parameters[ 'energyfield'] = "30,1000000" # Download everything, we will chose later query_parameters['photonOrExtendedOrNone'] = data_type query_parameters['destination'] = 'query' query_parameters['spacecraft'] = 'checked' # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits")) ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits")) # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1 = None prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft1 = ft1 break if prev_downloaded_ft1 is not None: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None: print( "Existing event file %s and Spacecraft file %s correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1, prev_downloaded_ft2)) return [prev_downloaded_ft1, prev_downloaded_ft2] # Print them out print("Query parameters:") for k, v in query_parameters.items(): print("%30s = %s" % (k, v)) # POST encoding postData = urllib.parse.urlencode(query_parameters).encode('utf-8') temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.request.urlcleanup() # Get the form compiled try: urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) except Exception as e: print(e) raise RuntimeError( "Problems with the download. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: #lines.append(line.encode('utf-8')) lines.append(line) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = \ [x for x in parser.data if x.find("The estimated time for your query to complete is") == 0][0] # Get the time estimate estimatedTimeForTheQuery = re.findall( 'The estimated time for your query to complete is ([0-9]+) seconds', estimatedTimeLine)[0] except: raise RuntimeError( "Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later.") else: print("\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery) http_address = [ x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0 ][0] print( "\nIf this download fails, you can find your data at %s (when ready)\n" % http_address) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120) # Seconds refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.request.urlretrieve( http_address, fakeName, ) except socket.timeout: urllib.request.urlcleanup() raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that " "you can access %s, then retry" % threeML_config['LAT']['query form']) except Exception as e: print(e) urllib.request.urlcleanup() raise RuntimeError( "Problems with the download. Check your connection or that you can access " "%s, then retry." % threeML_config['LAT']['query form']) with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == '2': # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.request.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location'] if links != None: filenames = [x.split('/')[-1] for x in links] print("\nDownloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [ downloader.download(filename, destination_directory) for filename in filenames ] else: raise RuntimeError("Could not download LAT Standard data") # Now we need to sort so that the FT1 is always first (they might be out of order) # If FT2 is first, switch them, otherwise do nothing if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None: # The FT2 is first, flip them downloaded_files = downloaded_files[::-1] # Finally, open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary for fits_file in downloaded_files: with pyfits.open(fits_file, mode='update') as f: f[0].header.set(_uid_fits_keyword, query_unique_id) return downloaded_files
def download_LLE_trigger_data(trigger_name, destination_directory='.'): """ Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the latest file versions. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiLATLLELike class. example usage: download_LLE_trigger_data('080916009', destination_directory='.') :param trigger_name: trigger number (str) with no leading letter e.g. '080916009' :param destination_directory: download directory :return: a dictionary with information about the download """ sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Figure out the directory on the server url = threeML_config['LAT']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist( "Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on) pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)' destination_directory_sanitized = sanitize_filename(destination_directory) downloaded_files = downloader.download_all_files( destination_directory_sanitized, progress=True, pattern=pattern) # Put the files in a structured dictionary download_info = DictWithPrettyPrint() for download in downloaded_files: file_type = _file_type_match.match(os.path.basename(download)).group(1) if file_type == 'cspec': # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension, # or a response matrix (with a .rsp extension) ext = os.path.splitext(os.path.basename(download))[1] if ext == '.rsp': file_type = 'rsp' elif ext == '.pha': file_type = 'cspec' else: raise RuntimeError("Should never get here") # The pt file is really an ft2 file if file_type == 'pt': file_type = 'ft2' download_info[file_type] = download return download_info
def download_GBM_trigger_data(trigger_name, detectors=None, destination_directory='.', compress_tte=True): """ Download the latest GBM TTE and RSP files from the HEASARC server. Will get the latest file version and prefer RSP2s over RSPs. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiGBMTTELike class. example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.') :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009' :param detectors: list of detectors, default is all detectors :param destination_directory: download directory :param compress_tte: compress the TTE files via gzip (default True) :return: a dictionary with information about the download """ # Let's doctor up the input just in case the user tried something strange sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Sanitize detector list (if any) if detectors is not None: for det in detectors: assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \ "Valid choices are: %s" % (det, _detector_list) else: detectors = list(_detector_list) # Open heasarc web page url = threeML_config['gbm']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Now select the files we want to download, then we will download them later # We do it in two steps because we want to be able to choose what to download once we # have the complete picture # Get the list of remote files remote_file_list = downloader.files # This is the dictionary to keep track of the classification remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors]) # Classify the files detector by detector for this_file in remote_file_list: # this_file is something like glg_tte_n9_bn100101988_v00.fit tokens = this_file.split("_") if len(tokens) != 5: # Not a data file continue else: # The "map" is necessary to transform the tokens to normal string (instead of unicode), # because u"b0" != "b0" as a key for a dictionary _, file_type, detname, _, version_ext = map(str, tokens) version, ext = version_ext.split(".") # We do not care here about the other files (tcat, bcat and so on), # nor about files which pertain to other detectors if file_type not in ['cspec', 'tte'] or ext not in ['rsp','rsp2','pha','fit'] or detname not in detectors: continue # cspec files can be rsp, rsp2 or pha files. Classify them if file_type == 'cspec': if ext == 'rsp': remote_files_info[detname]['rsp'] = this_file elif ext == 'rsp2': remote_files_info[detname]['rsp2'] = this_file elif ext == 'pha': remote_files_info[detname]['cspec'] = this_file else: raise RuntimeError("Should never get here") else: remote_files_info[detname][file_type] = this_file # Now download the files download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors]) for detector in remote_files_info.keys(): remote_detector_info = remote_files_info[detector] local_detector_info = download_info[detector] # Get CSPEC file local_detector_info['cspec'] = downloader.download(remote_detector_info['cspec'], destination_directory, progress=True) # Get the RSP2 file if it exists, otherwise get the RSP file if 'rsp2' in remote_detector_info: local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp2'], destination_directory, progress=True) else: local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp'], destination_directory, progress=True) # Get TTE file (compressing it if requested) local_detector_info['tte'] = downloader.download(remote_detector_info['tte'], destination_directory, progress=True, compress=compress_tte) return download_info
def download_GBM_trigger_data(trigger_name, detectors=None, destination_directory='.', compress_tte=True): """ Download the latest GBM TTE and RSP files from the HEASARC server. Will get the latest file version and prefer RSP2s over RSPs. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiGBMTTELike class. example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.') :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009' :param detectors: list of detectors, default is all detectors :param destination_directory: download directory :param compress_tte: compress the TTE files via gzip (default True) :return: a dictionary with information about the download """ # Let's doctor up the input just in case the user tried something strange sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Sanitize detector list (if any) if detectors is not None: for det in detectors: assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \ "Valid choices are: %s" % (det, _detector_list) else: detectors = list(_detector_list) # Open heasarc web page url = threeML_config['gbm']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist( "Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Now select the files we want to download, then we will download them later # We do it in two steps because we want to be able to choose what to download once we # have the complete picture # Get the list of remote files remote_file_list = downloader.files # This is the dictionary to keep track of the classification remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors]) # Classify the files detector by detector for this_file in remote_file_list: # this_file is something like glg_tte_n9_bn100101988_v00.fit tokens = this_file.split("_") if len(tokens) != 5: # Not a data file continue else: # The "map" is necessary to transform the tokens to normal string (instead of unicode), # because u"b0" != "b0" as a key for a dictionary _, file_type, detname, _, version_ext = list(map(str, tokens)) version, ext = version_ext.split(".") # We do not care here about the other files (tcat, bcat and so on), # nor about files which pertain to other detectors if file_type not in ['cspec', 'tte'] or ext not in [ 'rsp', 'rsp2', 'pha', 'fit' ] or detname not in detectors: continue # cspec files can be rsp, rsp2 or pha files. Classify them if file_type == 'cspec': if ext == 'rsp': remote_files_info[detname]['rsp'] = this_file elif ext == 'rsp2': remote_files_info[detname]['rsp2'] = this_file elif ext == 'pha': remote_files_info[detname]['cspec'] = this_file else: raise RuntimeError("Should never get here") else: remote_files_info[detname][file_type] = this_file # Now download the files download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors]) for detector in list(remote_files_info.keys()): remote_detector_info = remote_files_info[detector] local_detector_info = download_info[detector] # Get CSPEC file local_detector_info['cspec'] = downloader.download( remote_detector_info['cspec'], destination_directory, progress=True) # Get the RSP2 file if it exists, otherwise get the RSP file if 'rsp2' in remote_detector_info: local_detector_info['rsp'] = downloader.download( remote_detector_info['rsp2'], destination_directory, progress=True) else: local_detector_info['rsp'] = downloader.download( remote_detector_info['rsp'], destination_directory, progress=True) # Get TTE file (compressing it if requested) local_detector_info['tte'] = downloader.download( remote_detector_info['tte'], destination_directory, progress=True, compress=compress_tte) return download_info
def download_LAT_data(ra, dec, radius, tstart, tstop, time_type, data_type='Photon', destination_directory="."): """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ['MET', 'Gregorian', 'MJD'] assert time_type in _known_time_types, "Time type must be one of %s" % ",".join(_known_time_types) valid_classes = ['Photon', 'Extended'] assert data_type in valid_classes, "Data type must be one of %s" % ",".join(valid_classes) assert radius > 0, "Radius of the Region of Interest must be > 0" assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360" assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90" # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not os.path.exists(destination_directory): os.makedirs(destination_directory) # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url = threeML_config['LAT']['query form'] # Save parameters for the query in a dictionary query_parameters = {} query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec) query_parameters['coordsystem'] = "J2000" query_parameters['shapefield'] = "%s" % radius query_parameters['timefield'] = "%s,%s" % (tstart, tstop) query_parameters['timetype'] = "%s" % time_type query_parameters['energyfield'] = "30,1000000" # Download everything, we will chose later query_parameters['photonOrExtendedOrNone'] = data_type query_parameters['destination'] = 'query' query_parameters['spacecraft'] = 'checked' # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits")) ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits")) # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1 = None prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft1 = ft1 break if prev_downloaded_ft1 is not None: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None: print("Existing event file %s and Spacecraft file %s correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1, prev_downloaded_ft2)) return [prev_downloaded_ft1, prev_downloaded_ft2] # Print them out print("Query parameters:") for k, v in query_parameters.items(): print("%30s = %s" % (k, v)) # POST encoding postData = urllib.urlencode(query_parameters) temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.urlcleanup() # Get the form compiled try: urllib.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) except: raise RuntimeError("Problems with the download. Check your internet connection, or that the " "form at %s is accessible, then retry" % url) # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: lines.append(line.encode('utf-8')) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = \ filter(lambda x: x.find("The estimated time for your query to complete is") == 0, parser.data)[0] # Get the time estimate estimatedTimeForTheQuery = re.findall('The estimated time for your query to complete is ([0-9]+) seconds', estimatedTimeLine)[0] except: raise RuntimeError("Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later.") else: print("\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery) http_address = filter(lambda x: x.find("https://fermi.gsfc.nasa.gov") >= 0, parser.data)[0] print("\nIf this download fails, you can find your data at %s (when ready)\n" % http_address) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120) # Seconds refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.urlretrieve(http_address, fakeName, ) except socket.timeout: urllib.urlcleanup() raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that " "you can access %s, then retry" % threeML_config['LAT']['query form']) except: urllib.urlcleanup() raise RuntimeError("Problems with the download. Check your connection or that you can access " "%s, then retry." % threeML_config['LAT']['query form']) with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == '2': # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location'] if links != None: filenames = map(lambda x: x.split('/')[-1], links) print("\nDownloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [downloader.download(filename, destination_directory) for filename in filenames] else: raise RuntimeError("Could not download LAT Standard data") # Now we need to sort so that the FT1 is always first (they might be out of order) # If FT2 is first, switch them, otherwise do nothing if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None: # The FT2 is first, flip them downloaded_files = downloaded_files[::-1] # Finally, open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary for fits_file in downloaded_files: with pyfits.open(fits_file, mode='update') as f: f[0].header.set(_uid_fits_keyword, query_unique_id) return downloaded_files
def download_LAT_data(ra: float, dec: float, radius: float, tstart: float, tstop: float, time_type: str, data_type: str = "Photon", destination_directory: str = ".", Emin: float = 30., Emax: float = 1000000.) -> Path: """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :param Emin: minimum photon energy (in MeV) to download (default: 30 MeV, must be between 30 and 1e6 MeV) :param Emax: maximum photon energy (in MeV) to download (default: 1e6 MeV, must be betwen 30 and 1e6 MeV ) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ["MET", "Gregorian", "MJD"] if time_type not in _known_time_types: out = ",".join(_known_time_types) log.error(f"Time type must be one of {out}") raise TimeTypeNotKnown() valid_classes = ["Photon", "Extended"] if data_type not in valid_classes: out = ",".join(valid_classes) log.error(f"Data type must be one of {out}") raise TypeError() if radius <= 0: log.error("Radius of the Region of Interest must be > 0") raise ValueError() if not (0 <= ra <= 360.0): log.error("R.A. must be 0 <= ra <= 360") raise ValueError() if not -90 <= dec <= 90: log.error("Dec. must be -90 <= dec <= 90") raise ValueError() fermiEmin = 30 fermiEmax = 1e6 if Emin < fermiEmin: log.warning( f"Setting Emin from {Emin} to 30 MeV (minimum available energy for Fermi-LAT data)" ) Emin = fermiEmin if Emin > fermiEmax: log.warning( f"Setting Emin from {Emin} to 1 TeV (maximum available energy for Fermi-LAT data)" ) Emin = fermiEmax if Emax < fermiEmin: log.warning( f"Setting Emax from {Emax} to 30 MeV (minimum available energy for Fermi-LAT data)" ) Emax = fermiEmin if Emax > fermiEmax: log.warning( f"Setting Emax from {Emax} to 1 TeV (maximum available energy for Fermi-LAT data)" ) Emax = fermiEmax if Emin >= Emax: log.error( f"Minimum energy ({Emin}) must be less than maximum energy ({Emax}) for download." ) raise ValueError() # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not destination_directory.exists(): destination_directory.mkdir() # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url: str = threeML_config.LAT.query_form # Save parameters for the query in a dictionary query_parameters = {} query_parameters["coordfield"] = "%.4f,%.4f" % (ra, dec) query_parameters["coordsystem"] = "J2000" query_parameters["shapefield"] = "%s" % radius query_parameters["timefield"] = "%s,%s" % (tstart, tstop) query_parameters["timetype"] = "%s" % time_type query_parameters["energyfield"] = "%.3f,%.3f" % (Emin, Emax) query_parameters["photonOrExtendedOrNone"] = data_type query_parameters["destination"] = "query" query_parameters["spacecraft"] = "checked" # Print them out log.info("Query parameters:") for k, v in query_parameters.items(): log.info("%30s = %s" % (k, v)) # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) log.info("Query ID: %s" % query_unique_id) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = [x for x in destination_directory.glob("*PH??.fits")] ft2s = [x for x in destination_directory.glob("*SC??.fits")] # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1s = [] prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! Append to the list as there might be others prev_downloaded_ft1s.append(ft1) # break pass if len(prev_downloaded_ft1s) > 0: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! (FT2 is a single file) prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if len(prev_downloaded_ft1s) > 0 and prev_downloaded_ft2 is not None: log.warning( f"Existing event file {prev_downloaded_ft1s} and Spacecraft file {prev_downloaded_ft2} correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir") return ( merge_LAT_data(prev_downloaded_ft1s, destination_directory, outfile="L%s_FT1.fits" % query_unique_id, Emin=Emin, Emax=Emax), prev_downloaded_ft2, ) # POST encoding postData = urllib.parse.urlencode(query_parameters).encode("utf-8") temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.request.urlcleanup() # Get the form compiled try: urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: log.error( "Time out when connecting to the server. Check your internet connection, or that the " f"form at {url} is accessible, then retry") raise RuntimeError() except Exception as e: log.error(e) log.exception( "Problems with the download. Check your internet connection, or that the " f"form at {url} is accessible, then retry") raise RuntimeError() # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: # lines.append(line.encode('utf-8')) lines.append(line) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = [ x for x in parser.data if x.find("The estimated time for your query to complete is") == 0 ][0] # Get the time estimate estimated_time_for_the_query = re.findall( "The estimated time for your query to complete is ([0-9]+) seconds", estimatedTimeLine, )[0] except: raise RuntimeError( "Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later.") else: log.info( f"Estimated complete time for your query: {estimated_time_for_the_query} seconds" ) http_address = [ x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0 ][0] log.info( f"If this download fails, you can find your data at {http_address} (when ready)" ) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimated_time_for_the_query)), 120) # Seconds refreshTime = min(float(estimated_time_for_the_query) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.request.urlretrieve( http_address, fakeName, ) except socket.timeout: urllib.request.urlcleanup() log.exception( "Time out when connecting to the server. Check your internet connection, or that " f"you can access {threeML_config.LAT.query_form}, then retry") raise RuntimeError() except Exception as e: log.error(e) urllib.request.urlcleanup() log.exception( "Problems with the download. Check your connection or that you can access " f"{threeML_config.LAT.query_form}, then retry.") raise RuntimeError() with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == "2": # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.request.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config.LAT.public_http_location if links != None: filenames = [x.split("/")[-1] for x in links] log.info("Downloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [ downloader.download(filename, destination_directory) for filename in filenames ] else: log.error("Could not download LAT Standard data") raise RuntimeError() # Now we need to sort so that the FT1 is always first (they might be out of order) # Separate the FT1 and FT2 files: FT1 = [] FT2 = None for fits_file in downloaded_files: # Open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary with pyfits.open(fits_file, mode="update") as f: f[0].header.set(_uid_fits_keyword, query_unique_id) if re.match(".+SC[0-9][0-9].fits", str(fits_file)) is not None: FT2 = fits_file else: FT1.append(fits_file) # If FT2 is first, switch them, otherwise do nothing # if re.match(".+SC[0-9][0-9].fits", downloaded_files[0]) is not None: return (merge_LAT_data(FT1, destination_directory, outfile="L%s_FT1.fits" % query_unique_id, Emin=Emin, Emax=Emax), FT2)
def download_GBM_trigger_data(trigger_name: str, detectors: Optional[List[str]] = None, destination_directory: str = ".", compress_tte: bool = True) -> Dict[str, Any]: """ Download the latest GBM TTE and RSP files from the HEASARC server. Will get the latest file version and prefer RSP2s over RSPs. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiGBMTTELike class. example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.') :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009' :param detectors: list of detectors, default is all detectors :param destination_directory: download directory :param compress_tte: compress the TTE files via gzip (default True) :return: a dictionary with information about the download """ # Let's doctor up the input just in case the user tried something strange sanitized_trigger_name_: str = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory: Path = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Sanitize detector list (if any) if detectors is not None: for det in detectors: if det not in _detector_list: log.error( f"Detector {det} in the provided list is not a valid detector. " f"Valid choices are: {_detector_list}") raise DetDoesNotExist() else: detectors: List[str] = list(_detector_list) # Open heasarc web page url = threeML_config.GBM.public_http_location year = f"20{sanitized_trigger_name_[:2]}" directory = f"/triggers/{year}/bn{sanitized_trigger_name_}/current" heasarc_web_page_url = f"{url}/{directory}" log.debug(f"going to look in {heasarc_web_page_url}") try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: log.exception( f"Trigger {sanitized_trigger_name_} does not exist at {heasarc_web_page_url}" ) raise TriggerDoesNotExist() # Now select the files we want to download, then we will download them later # We do it in two steps because we want to be able to choose what to download once we # have the complete picture # Get the list of remote files remote_file_list = downloader.files # This is the dictionary to keep track of the classification remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors]) # Classify the files detector by detector for this_file in remote_file_list: # this_file is something like glg_tte_n9_bn100101988_v00.fit tokens = this_file.split("_") if len(tokens) != 5: # Not a data file continue else: # The "map" is necessary to transform the tokens to normal string (instead of unicode), # because u"b0" != "b0" as a key for a dictionary _, file_type, detname, _, version_ext = list(map(str, tokens)) version, ext = version_ext.split(".") # We do not care here about the other files (tcat, bcat and so on), # nor about files which pertain to other detectors if (file_type not in ["cspec", "tte"] or ext not in ["rsp", "rsp2", "pha", "fit"] or detname not in detectors): continue # cspec files can be rsp, rsp2 or pha files. Classify them if file_type == "cspec": if ext == "rsp": remote_files_info[detname]["rsp"] = this_file elif ext == "rsp2": remote_files_info[detname]["rsp2"] = this_file elif ext == "pha": remote_files_info[detname]["cspec"] = this_file else: raise RuntimeError("Should never get here") else: remote_files_info[detname][file_type] = this_file # Now download the files download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors]) for detector in list(remote_files_info.keys()): log.debug(f"trying to download GBM detector {detector}") remote_detector_info = remote_files_info[detector] local_detector_info = download_info[detector] # Get CSPEC file local_detector_info["cspec"] = downloader.download( remote_detector_info["cspec"], destination_directory, progress=True) # Get the RSP2 file if it exists, otherwise get the RSP file if "rsp2" in remote_detector_info: log.debug(f"{detector} has RSP2 responses") local_detector_info["rsp"] = downloader.download( remote_detector_info["rsp2"], destination_directory, progress=True) else: log.debug(f"{detector} has RSP responses") local_detector_info["rsp"] = downloader.download( remote_detector_info["rsp"], destination_directory, progress=True) # Get TTE file (compressing it if requested) local_detector_info["tte"] = downloader.download( remote_detector_info["tte"], destination_directory, progress=True, compress=compress_tte, ) return download_info
def download_LLE_trigger_data(trigger_name, destination_directory='.'): """ Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the latest file versions. If the files already exist in your destination directory, they will be skipped in the download process. The output dictionary can be used as input to the FermiLATLLELike class. example usage: download_LLE_trigger_data('080916009', destination_directory='.') :param trigger_name: trigger number (str) with no leading letter e.g. '080916009' :param destination_directory: download directory :return: a dictionary with information about the download """ sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name) # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if_directory_not_existing_then_make(destination_directory) # Figure out the directory on the server url = threeML_config['LAT']['public HTTP location'] year = '20%s' % sanitized_trigger_name_[:2] directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_) heasarc_web_page_url = '%s/%s' % (url, directory) try: downloader = ApacheDirectory(heasarc_web_page_url) except RemoteDirectoryNotFound: raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url)) # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on) pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)' destination_directory_sanitized = sanitize_filename(destination_directory) downloaded_files = downloader.download_all_files(destination_directory_sanitized, progress=True, pattern=pattern) # Put the files in a structured dictionary download_info = DictWithPrettyPrint() for download in downloaded_files: file_type = _file_type_match.match(os.path.basename(download)).group(1) if file_type == 'cspec': # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension, # or a response matrix (with a .rsp extension) ext = os.path.splitext(os.path.basename(download))[1] if ext == '.rsp': file_type = 'rsp' elif ext == '.pha': file_type = 'cspec' else: raise RuntimeError("Should never get here") # The pt file is really an ft2 file if file_type == 'pt': file_type = 'ft2' download_info[file_type] = download return download_info