Python ApacheDirectory Examples

Programming Language: Python

Namespace/Package Name: threeML.io.download_from_http

Class/Type: ApacheDirectory

Examples at hotexamples.com: 8

Python ApacheDirectory - 8 examples found. These are the top rated real world Python examples of threeML.io.download_from_http.ApacheDirectory extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ApacheDirectory(5)

download(4)

download_all_files(1)

Example #1

Show file

File: download_LAT_data.py Project: Husky22/threeML

def download_LAT_data(ra,
                      dec,
                      radius,
                      tstart,
                      tstop,
                      time_type,
                      data_type='Photon',
                      destination_directory="."):
    """
    Download data from the public LAT data server (of course you need a working internet connection). Data are
    selected in a circular Region of Interest (cone) centered on the provided coordinates.

    Example:

    ```
    > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23',
    time_type='Gregorian', destination_directory='my_new_data')
    ```

    :param ra: R.A. (J2000) of the center of the ROI
    :param dec: Dec. (J2000) of the center of the ROI
    :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the
    analysis)
    :param tstart: start time for the data
    :param tstop: stop time for the data
    :param time_type: type of the time input (one of MET, Gregorian or MJD)
    :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise.
    Default is Photon.
    :param destination_directory: directory where you want to save the data (default: current directory)
    :return: the path to the downloaded FT1 and FT2 file
    """
    _known_time_types = ['MET', 'Gregorian', 'MJD']

    assert time_type in _known_time_types, "Time type must be one of %s" % ",".join(
        _known_time_types)

    valid_classes = ['Photon', 'Extended']
    assert data_type in valid_classes, "Data type must be one of %s" % ",".join(
        valid_classes)

    assert radius > 0, "Radius of the Region of Interest must be > 0"

    assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360"
    assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90"

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory,
                                              abspath=True)

    if not os.path.exists(destination_directory):

        os.makedirs(destination_directory)

    # This will complete automatically the form available at
    # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi
    # After submitting the form, an html page will inform about
    # the identifier assigned to the query and the time which will be
    # needed to process it. After retrieving the query number,
    # this function will wait for the files to be completed on the server,
    # then it will download them

    url = threeML_config['LAT']['query form']

    # Save parameters for the query in a dictionary

    query_parameters = {}
    query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec)
    query_parameters['coordsystem'] = "J2000"
    query_parameters['shapefield'] = "%s" % radius
    query_parameters['timefield'] = "%s,%s" % (tstart, tstop)
    query_parameters['timetype'] = "%s" % time_type
    query_parameters[
        'energyfield'] = "30,1000000"  # Download everything, we will chose later
    query_parameters['photonOrExtendedOrNone'] = data_type
    query_parameters['destination'] = 'query'
    query_parameters['spacecraft'] = 'checked'

    # Compute a unique ID for this query
    query_unique_id = get_unique_deterministic_tag(str(query_parameters))

    # Look if there are FT1 and FT2 files in the output directory matching this unique ID

    ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits"))
    ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits"))

    # Loop over all ft1s and see if there is any matching the uid

    prev_downloaded_ft1 = None
    prev_downloaded_ft2 = None

    for ft1 in ft1s:

        with pyfits.open(ft1) as f:

            this_query_uid = f[0].header.get(_uid_fits_keyword)

            if this_query_uid == query_unique_id:

                # Found one!

                prev_downloaded_ft1 = ft1

                break

    if prev_downloaded_ft1 is not None:

        for ft2 in ft2s:

            with pyfits.open(ft2) as f:

                this_query_uid = f[0].header.get(_uid_fits_keyword)

                if this_query_uid == query_unique_id:
                    # Found one!

                    prev_downloaded_ft2 = ft2

                    break

    else:

        # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either
        pass

    # If we have both FT1 and FT2 matching the ID, we do not need to download anymore
    if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None:

        print(
            "Existing event file %s and Spacecraft file %s correspond to the same selection. "
            "We assume you did not tamper with them, so we will return those instead of downloading them again. "
            "If you want to download them again, remove them from the outdir" %
            (prev_downloaded_ft1, prev_downloaded_ft2))

        return [prev_downloaded_ft1, prev_downloaded_ft2]

    # Print them out

    print("Query parameters:")

    for k, v in query_parameters.items():

        print("%30s = %s" % (k, v))

    # POST encoding

    postData = urllib.parse.urlencode(query_parameters).encode('utf-8')
    temporaryFileName = "__temp_query_result.html"

    # Remove temp file if present

    try:

        os.remove(temporaryFileName)

    except:

        pass

    # This is to avoid caching

    urllib.request.urlcleanup()

    # Get the form compiled
    try:
        urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0,
                                   postData)
    except socket.timeout:

        raise RuntimeError(
            "Time out when connecting to the server. Check your internet connection, or that the "
            "form at %s is accessible, then retry" % url)
    except Exception as e:

        print(e)

        raise RuntimeError(
            "Problems with the download. Check your internet connection, or that the "
            "form at %s is accessible, then retry" % url)

    # Now open the file, parse it and get the query ID

    with open(temporaryFileName) as htmlFile:

        lines = []

        for line in htmlFile:

            #lines.append(line.encode('utf-8'))
            lines.append(line)

        html = " ".join(lines).strip()

    os.remove(temporaryFileName)

    # Extract data from the response

    parser = DivParser("sec-wrapper")
    parser.feed(html)

    if parser.data == []:

        parser = DivParser("right-side")
        parser.feed(html)

    try:

        # Get line containing the time estimation

        estimatedTimeLine = \
            [x for x in parser.data if x.find("The estimated time for your query to complete is") == 0][0]

        # Get the time estimate

        estimatedTimeForTheQuery = re.findall(
            'The estimated time for your query to complete is ([0-9]+) seconds',
            estimatedTimeLine)[0]

    except:

        raise RuntimeError(
            "Problems with the download. Empty or wrong answer from the LAT server. "
            "Please retry later.")

    else:

        print("\nEstimated complete time for your query: %s seconds" %
              estimatedTimeForTheQuery)

    http_address = [
        x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0
    ][0]

    print(
        "\nIf this download fails, you can find your data at %s (when ready)\n"
        % http_address)

    # Now periodically check if the query is complete

    startTime = time.time()
    timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)),
                  120)  # Seconds
    refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0)  # Seconds

    # precompile Url regular expression
    regexpr = re.compile("wget (.*.fits)")

    # Now download every tot seconds the status of the query, until we get status=2 (success)

    links = None
    fakeName = "__temp__query__result.html"

    while time.time() <= startTime + timeout:

        # Try and fetch the html with the results

        try:

            _ = urllib.request.urlretrieve(
                http_address,
                fakeName,
            )

        except socket.timeout:

            urllib.request.urlcleanup()

            raise RuntimeError(
                "Time out when connecting to the server. Check your internet connection, or that "
                "you can access %s, then retry" %
                threeML_config['LAT']['query form'])

        except Exception as e:

            print(e)

            urllib.request.urlcleanup()

            raise RuntimeError(
                "Problems with the download. Check your connection or that you can access "
                "%s, then retry." % threeML_config['LAT']['query form'])

        with open(fakeName) as f:

            html = " ".join(f.readlines())

        status = re.findall("The state of your query is ([0-9]+)", html)[0]

        if status == '2':

            # Success! Get the download link
            links = regexpr.findall(html)

            # Remove temp file
            os.remove(fakeName)

            # we're done
            break

        else:

            # Clean up and try again after a while

            os.remove(fakeName)

            urllib.request.urlcleanup()
            time.sleep(refreshTime)

            # Continue to next iteration

    remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location']

    if links != None:

        filenames = [x.split('/')[-1] for x in links]

        print("\nDownloading FT1 and FT2 files...")

        downloader = ApacheDirectory(remotePath)

        downloaded_files = [
            downloader.download(filename, destination_directory)
            for filename in filenames
        ]

    else:

        raise RuntimeError("Could not download LAT Standard data")

    # Now we need to sort so that the FT1 is always first (they might be out of order)

    # If FT2 is first, switch them, otherwise do nothing
    if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None:

        # The FT2 is first, flip them
        downloaded_files = downloaded_files[::-1]

    # Finally, open the FITS file and write the unique key for this query, so that the download will not be
    # repeated if not necessary

    for fits_file in downloaded_files:

        with pyfits.open(fits_file, mode='update') as f:

            f[0].header.set(_uid_fits_keyword, query_unique_id)

    return downloaded_files

Example #2

Show file

File: download_LLE_data.py Project: Husky22/threeML

def download_LLE_trigger_data(trigger_name, destination_directory='.'):
    """
    Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the
    latest file versions. If the files already exist in your destination
    directory, they will be skipped in the download process. The output dictionary can be used
    as input to the FermiLATLLELike class.

    example usage: download_LLE_trigger_data('080916009', destination_directory='.')

    :param trigger_name: trigger number (str) with no leading letter e.g. '080916009'
    :param destination_directory: download directory
    :return: a dictionary with information about the download
    """

    sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name)

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory,
                                              abspath=True)
    if_directory_not_existing_then_make(destination_directory)

    # Figure out the directory on the server
    url = threeML_config['LAT']['public HTTP location']

    year = '20%s' % sanitized_trigger_name_[:2]
    directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_)

    heasarc_web_page_url = '%s/%s' % (url, directory)

    try:

        downloader = ApacheDirectory(heasarc_web_page_url)

    except RemoteDirectoryNotFound:

        raise TriggerDoesNotExist(
            "Trigger %s does not exist at %s" %
            (sanitized_trigger_name_, heasarc_web_page_url))

    # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on)
    pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)'

    destination_directory_sanitized = sanitize_filename(destination_directory)

    downloaded_files = downloader.download_all_files(
        destination_directory_sanitized, progress=True, pattern=pattern)

    # Put the files in a structured dictionary

    download_info = DictWithPrettyPrint()

    for download in downloaded_files:

        file_type = _file_type_match.match(os.path.basename(download)).group(1)

        if file_type == 'cspec':

            # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension,
            # or a response matrix (with a .rsp extension)

            ext = os.path.splitext(os.path.basename(download))[1]

            if ext == '.rsp':

                file_type = 'rsp'

            elif ext == '.pha':

                file_type = 'cspec'

            else:

                raise RuntimeError("Should never get here")

        # The pt file is really an ft2 file

        if file_type == 'pt':

            file_type = 'ft2'

        download_info[file_type] = download

    return download_info

Example #3

Show file

File: download_GBM_data.py Project: giacomov/3ML

def download_GBM_trigger_data(trigger_name, detectors=None, destination_directory='.', compress_tte=True):
    """
    Download the latest GBM TTE and RSP files from the HEASARC server. Will get the
    latest file version and prefer RSP2s over RSPs. If the files already exist in your destination
    directory, they will be skipped in the download process. The output dictionary can be used
    as input to the FermiGBMTTELike class.

    example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.')

    :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009'
    :param detectors: list of detectors, default is all detectors
    :param destination_directory: download directory
    :param compress_tte: compress the TTE files via gzip (default True)
    :return: a dictionary with information about the download
    """

    # Let's doctor up the input just in case the user tried something strange

    sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name)

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory, abspath=True)

    if_directory_not_existing_then_make(destination_directory)

    # Sanitize detector list (if any)
    if detectors is not None:

        for det in detectors:

            assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \
                                          "Valid choices are: %s" % (det, _detector_list)

    else:

        detectors = list(_detector_list)

    # Open heasarc web page

    url = threeML_config['gbm']['public HTTP location']
    year = '20%s' % sanitized_trigger_name_[:2]
    directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_)

    heasarc_web_page_url = '%s/%s' % (url, directory)

    try:

        downloader = ApacheDirectory(heasarc_web_page_url)

    except RemoteDirectoryNotFound:

        raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url))

    # Now select the files we want to download, then we will download them later
    # We do it in two steps because we want to be able to choose what to download once we
    # have the complete picture

    # Get the list of remote files
    remote_file_list = downloader.files

    # This is the dictionary to keep track of the classification
    remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors])

    # Classify the files detector by detector

    for this_file in remote_file_list:

        # this_file is something like glg_tte_n9_bn100101988_v00.fit
        tokens = this_file.split("_")

        if len(tokens) != 5:

            # Not a data file

            continue

        else:

            # The "map" is necessary to transform the tokens to normal string (instead of unicode),
            # because u"b0" != "b0" as a key for a dictionary

            _, file_type, detname, _, version_ext = map(str, tokens)

        version, ext = version_ext.split(".")

        # We do not care here about the other files (tcat, bcat and so on),
        # nor about files which pertain to other detectors

        if file_type not in ['cspec', 'tte'] or ext not in ['rsp','rsp2','pha','fit'] or detname not in detectors:

            continue

        # cspec files can be rsp, rsp2 or pha files. Classify them

        if file_type == 'cspec':

            if ext == 'rsp':

                remote_files_info[detname]['rsp'] = this_file

            elif ext == 'rsp2':

                remote_files_info[detname]['rsp2'] = this_file

            elif ext == 'pha':

                remote_files_info[detname]['cspec'] = this_file

            else:

                raise RuntimeError("Should never get here")

        else:

            remote_files_info[detname][file_type] = this_file

    # Now download the files

    download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint()) for det in detectors])

    for detector in remote_files_info.keys():

        remote_detector_info = remote_files_info[detector]
        local_detector_info = download_info[detector]

        # Get CSPEC file
        local_detector_info['cspec'] = downloader.download(remote_detector_info['cspec'], destination_directory,
                                                           progress=True)

        # Get the RSP2 file if it exists, otherwise get the RSP file
        if 'rsp2' in remote_detector_info:

            local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp2'], destination_directory,
                                                              progress=True)

        else:

            local_detector_info['rsp'] = downloader.download(remote_detector_info['rsp'], destination_directory,
                                                             progress=True)

        # Get TTE file (compressing it if requested)
        local_detector_info['tte'] = downloader.download(remote_detector_info['tte'], destination_directory,
                                                         progress=True, compress=compress_tte)

    return download_info

Example #4

Show file

def download_GBM_trigger_data(trigger_name,
                              detectors=None,
                              destination_directory='.',
                              compress_tte=True):
    """
    Download the latest GBM TTE and RSP files from the HEASARC server. Will get the
    latest file version and prefer RSP2s over RSPs. If the files already exist in your destination
    directory, they will be skipped in the download process. The output dictionary can be used
    as input to the FermiGBMTTELike class.

    example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.')

    :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009'
    :param detectors: list of detectors, default is all detectors
    :param destination_directory: download directory
    :param compress_tte: compress the TTE files via gzip (default True)
    :return: a dictionary with information about the download
    """

    # Let's doctor up the input just in case the user tried something strange

    sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name)

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory,
                                              abspath=True)

    if_directory_not_existing_then_make(destination_directory)

    # Sanitize detector list (if any)
    if detectors is not None:

        for det in detectors:

            assert det in _detector_list, "Detector %s in the provided list is not a valid detector. " \
                                          "Valid choices are: %s" % (det, _detector_list)

    else:

        detectors = list(_detector_list)

    # Open heasarc web page

    url = threeML_config['gbm']['public HTTP location']
    year = '20%s' % sanitized_trigger_name_[:2]
    directory = '/triggers/%s/bn%s/current' % (year, sanitized_trigger_name_)

    heasarc_web_page_url = '%s/%s' % (url, directory)

    try:

        downloader = ApacheDirectory(heasarc_web_page_url)

    except RemoteDirectoryNotFound:

        raise TriggerDoesNotExist(
            "Trigger %s does not exist at %s" %
            (sanitized_trigger_name_, heasarc_web_page_url))

    # Now select the files we want to download, then we will download them later
    # We do it in two steps because we want to be able to choose what to download once we
    # have the complete picture

    # Get the list of remote files
    remote_file_list = downloader.files

    # This is the dictionary to keep track of the classification
    remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors])

    # Classify the files detector by detector

    for this_file in remote_file_list:

        # this_file is something like glg_tte_n9_bn100101988_v00.fit
        tokens = this_file.split("_")

        if len(tokens) != 5:

            # Not a data file

            continue

        else:

            # The "map" is necessary to transform the tokens to normal string (instead of unicode),
            # because u"b0" != "b0" as a key for a dictionary

            _, file_type, detname, _, version_ext = list(map(str, tokens))

        version, ext = version_ext.split(".")

        # We do not care here about the other files (tcat, bcat and so on),
        # nor about files which pertain to other detectors

        if file_type not in ['cspec', 'tte'] or ext not in [
                'rsp', 'rsp2', 'pha', 'fit'
        ] or detname not in detectors:

            continue

        # cspec files can be rsp, rsp2 or pha files. Classify them

        if file_type == 'cspec':

            if ext == 'rsp':

                remote_files_info[detname]['rsp'] = this_file

            elif ext == 'rsp2':

                remote_files_info[detname]['rsp2'] = this_file

            elif ext == 'pha':

                remote_files_info[detname]['cspec'] = this_file

            else:

                raise RuntimeError("Should never get here")

        else:

            remote_files_info[detname][file_type] = this_file

    # Now download the files

    download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint())
                                         for det in detectors])

    for detector in list(remote_files_info.keys()):

        remote_detector_info = remote_files_info[detector]
        local_detector_info = download_info[detector]

        # Get CSPEC file
        local_detector_info['cspec'] = downloader.download(
            remote_detector_info['cspec'],
            destination_directory,
            progress=True)

        # Get the RSP2 file if it exists, otherwise get the RSP file
        if 'rsp2' in remote_detector_info:

            local_detector_info['rsp'] = downloader.download(
                remote_detector_info['rsp2'],
                destination_directory,
                progress=True)

        else:

            local_detector_info['rsp'] = downloader.download(
                remote_detector_info['rsp'],
                destination_directory,
                progress=True)

        # Get TTE file (compressing it if requested)
        local_detector_info['tte'] = downloader.download(
            remote_detector_info['tte'],
            destination_directory,
            progress=True,
            compress=compress_tte)

    return download_info

Example #5

Show file

File: download_LAT_data.py Project: giacomov/3ML

def download_LAT_data(ra, dec, radius, tstart, tstop, time_type, data_type='Photon', destination_directory="."):
    """
    Download data from the public LAT data server (of course you need a working internet connection). Data are
    selected in a circular Region of Interest (cone) centered on the provided coordinates.

    Example:

    ```
    > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23',
    time_type='Gregorian', destination_directory='my_new_data')
    ```

    :param ra: R.A. (J2000) of the center of the ROI
    :param dec: Dec. (J2000) of the center of the ROI
    :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the
    analysis)
    :param tstart: start time for the data
    :param tstop: stop time for the data
    :param time_type: type of the time input (one of MET, Gregorian or MJD)
    :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise.
    Default is Photon.
    :param destination_directory: directory where you want to save the data (default: current directory)
    :return: the path to the downloaded FT1 and FT2 file
    """
    _known_time_types = ['MET', 'Gregorian', 'MJD']

    assert time_type in _known_time_types, "Time type must be one of %s" % ",".join(_known_time_types)

    valid_classes = ['Photon', 'Extended']
    assert data_type in valid_classes, "Data type must be one of %s" % ",".join(valid_classes)

    assert radius > 0, "Radius of the Region of Interest must be > 0"

    assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360"
    assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90"

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory, abspath=True)

    if not os.path.exists(destination_directory):

        os.makedirs(destination_directory)

    # This will complete automatically the form available at
    # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi
    # After submitting the form, an html page will inform about
    # the identifier assigned to the query and the time which will be
    # needed to process it. After retrieving the query number,
    # this function will wait for the files to be completed on the server,
    # then it will download them

    url = threeML_config['LAT']['query form']

    # Save parameters for the query in a dictionary

    query_parameters = {}
    query_parameters['coordfield'] = "%.4f,%.4f" % (ra, dec)
    query_parameters['coordsystem'] = "J2000"
    query_parameters['shapefield'] = "%s" % radius
    query_parameters['timefield'] = "%s,%s" % (tstart, tstop)
    query_parameters['timetype'] = "%s" % time_type
    query_parameters['energyfield'] = "30,1000000"  # Download everything, we will chose later
    query_parameters['photonOrExtendedOrNone'] = data_type
    query_parameters['destination'] = 'query'
    query_parameters['spacecraft'] = 'checked'

    # Compute a unique ID for this query
    query_unique_id = get_unique_deterministic_tag(str(query_parameters))

    # Look if there are FT1 and FT2 files in the output directory matching this unique ID

    ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits"))
    ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits"))

    # Loop over all ft1s and see if there is any matching the uid

    prev_downloaded_ft1 = None
    prev_downloaded_ft2 = None

    for ft1 in ft1s:

        with pyfits.open(ft1) as f:

            this_query_uid = f[0].header.get(_uid_fits_keyword)

            if this_query_uid == query_unique_id:

                # Found one!

                prev_downloaded_ft1 = ft1

                break

    if prev_downloaded_ft1 is not None:

        for ft2 in ft2s:

            with pyfits.open(ft2) as f:

                this_query_uid = f[0].header.get(_uid_fits_keyword)

                if this_query_uid == query_unique_id:
                    # Found one!

                    prev_downloaded_ft2 = ft2

                    break

    else:

        # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either
        pass

    # If we have both FT1 and FT2 matching the ID, we do not need to download anymore
    if prev_downloaded_ft1 is not None and prev_downloaded_ft2 is not None:

        print("Existing event file %s and Spacecraft file %s correspond to the same selection. "
              "We assume you did not tamper with them, so we will return those instead of downloading them again. "
              "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1,
                                                                                   prev_downloaded_ft2))

        return [prev_downloaded_ft1, prev_downloaded_ft2]

    # Print them out

    print("Query parameters:")

    for k, v in query_parameters.items():

        print("%30s = %s" % (k, v))

    # POST encoding

    postData = urllib.urlencode(query_parameters)
    temporaryFileName = "__temp_query_result.html"

    # Remove temp file if present

    try:

        os.remove(temporaryFileName)

    except:

        pass

    # This is to avoid caching

    urllib.urlcleanup()

    # Get the form compiled
    try:
        urllib.urlretrieve(url,
                           temporaryFileName,
                           lambda x, y, z: 0, postData)
    except socket.timeout:

        raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that the "
                           "form at %s is accessible, then retry" % url)
    except:

        raise RuntimeError("Problems with the download. Check your internet connection, or that the "
                           "form at %s is accessible, then retry" % url)

    # Now open the file, parse it and get the query ID

    with open(temporaryFileName) as htmlFile:

        lines = []

        for line in htmlFile:

            lines.append(line.encode('utf-8'))

        html = " ".join(lines).strip()

    os.remove(temporaryFileName)

    # Extract data from the response

    parser = DivParser("sec-wrapper")
    parser.feed(html)

    if parser.data == []:

        parser = DivParser("right-side")
        parser.feed(html)

    try:

        # Get line containing the time estimation

        estimatedTimeLine = \
            filter(lambda x: x.find("The estimated time for your query to complete is") == 0, parser.data)[0]

        # Get the time estimate

        estimatedTimeForTheQuery = re.findall('The estimated time for your query to complete is ([0-9]+) seconds',
                                              estimatedTimeLine)[0]

    except:

        raise RuntimeError("Problems with the download. Empty or wrong answer from the LAT server. "
                           "Please retry later.")

    else:

        print("\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery)

    http_address = filter(lambda x: x.find("https://fermi.gsfc.nasa.gov") >= 0, parser.data)[0]

    print("\nIf this download fails, you can find your data at %s (when ready)\n" % http_address)

    # Now periodically check if the query is complete

    startTime = time.time()
    timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120)  # Seconds
    refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0)  # Seconds

    # precompile Url regular expression
    regexpr = re.compile("wget (.*.fits)")

    # Now download every tot seconds the status of the query, until we get status=2 (success)

    links = None
    fakeName = "__temp__query__result.html"

    while time.time() <= startTime + timeout:

        # Try and fetch the html with the results

        try:

            _ = urllib.urlretrieve(http_address, fakeName, )

        except socket.timeout:

            urllib.urlcleanup()

            raise RuntimeError("Time out when connecting to the server. Check your internet connection, or that "
                               "you can access %s, then retry" % threeML_config['LAT']['query form'])

        except:

            urllib.urlcleanup()

            raise RuntimeError("Problems with the download. Check your connection or that you can access "
                               "%s, then retry." % threeML_config['LAT']['query form'])

        with open(fakeName) as f:

            html = " ".join(f.readlines())

        status = re.findall("The state of your query is ([0-9]+)", html)[0]

        if status == '2':

            # Success! Get the download link
            links = regexpr.findall(html)

            # Remove temp file
            os.remove(fakeName)

            # we're done
            break

        else:

            # Clean up and try again after a while

            os.remove(fakeName)

            urllib.urlcleanup()
            time.sleep(refreshTime)

            # Continue to next iteration

    remotePath = "%s/queries/" % threeML_config['LAT']['public HTTP location']

    if links != None:

        filenames = map(lambda x: x.split('/')[-1], links)

        print("\nDownloading FT1 and FT2 files...")

        downloader = ApacheDirectory(remotePath)

        downloaded_files = [downloader.download(filename, destination_directory) for filename in filenames]

    else:

        raise RuntimeError("Could not download LAT Standard data")

    # Now we need to sort so that the FT1 is always first (they might be out of order)

    # If FT2 is first, switch them, otherwise do nothing
    if re.match('.+SC[0-9][0-9].fits', downloaded_files[0]) is not None:

        # The FT2 is first, flip them
        downloaded_files = downloaded_files[::-1]

    # Finally, open the FITS file and write the unique key for this query, so that the download will not be
    # repeated if not necessary

    for fits_file in downloaded_files:

        with pyfits.open(fits_file, mode='update') as f:

            f[0].header.set(_uid_fits_keyword, query_unique_id)

    return downloaded_files

Example #6

Show file

File: download_LAT_data.py Project: BjoernBiltzinger/threeML-1

def download_LAT_data(ra: float,
                      dec: float,
                      radius: float,
                      tstart: float,
                      tstop: float,
                      time_type: str,
                      data_type: str = "Photon",
                      destination_directory: str = ".",
                      Emin: float = 30.,
                      Emax: float = 1000000.) -> Path:
    """
    Download data from the public LAT data server (of course you need a working internet connection). Data are
    selected in a circular Region of Interest (cone) centered on the provided coordinates.

    Example:

    ```
    > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23',
    time_type='Gregorian', destination_directory='my_new_data')
    ```

    :param ra: R.A. (J2000) of the center of the ROI
    :param dec: Dec. (J2000) of the center of the ROI
    :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the
    analysis)
    :param tstart: start time for the data
    :param tstop: stop time for the data
    :param time_type: type of the time input (one of MET, Gregorian or MJD)
    :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise.
    Default is Photon.
    :param destination_directory: directory where you want to save the data (default: current directory)
    :param Emin: minimum photon energy (in MeV) to download (default: 30 MeV, must be between 30 and 1e6 MeV)
    :param Emax: maximum photon energy (in MeV) to download (default: 1e6 MeV, must be betwen 30 and 1e6 MeV )
    :return: the path to the downloaded FT1 and FT2 file
    """
    _known_time_types = ["MET", "Gregorian", "MJD"]

    if time_type not in _known_time_types:
        out = ",".join(_known_time_types)
        log.error(f"Time type must be one of {out}")
        raise TimeTypeNotKnown()

    valid_classes = ["Photon", "Extended"]
    if data_type not in valid_classes:
        out = ",".join(valid_classes)
        log.error(f"Data type must be one of {out}")
        raise TypeError()

    if radius <= 0:
        log.error("Radius of the Region of Interest must be > 0")
        raise ValueError()

    if not (0 <= ra <= 360.0):
        log.error("R.A. must be 0 <= ra <= 360")
        raise ValueError()

    if not -90 <= dec <= 90:
        log.error("Dec. must be -90 <= dec <= 90")
        raise ValueError()

    fermiEmin = 30
    fermiEmax = 1e6

    if Emin < fermiEmin:
        log.warning(
            f"Setting Emin from {Emin} to 30 MeV (minimum available energy for Fermi-LAT data)"
        )
        Emin = fermiEmin

    if Emin > fermiEmax:
        log.warning(
            f"Setting Emin from {Emin} to 1 TeV (maximum available energy for Fermi-LAT data)"
        )
        Emin = fermiEmax

    if Emax < fermiEmin:
        log.warning(
            f"Setting Emax from {Emax} to 30 MeV (minimum available energy for Fermi-LAT data)"
        )
        Emax = fermiEmin

    if Emax > fermiEmax:
        log.warning(
            f"Setting Emax from {Emax} to 1 TeV (maximum available energy for Fermi-LAT data)"
        )
        Emax = fermiEmax

    if Emin >= Emax:
        log.error(
            f"Minimum energy ({Emin}) must be less than maximum energy ({Emax}) for download."
        )
        raise ValueError()

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory,
                                              abspath=True)

    if not destination_directory.exists():

        destination_directory.mkdir()

    # This will complete automatically the form available at
    # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi
    # After submitting the form, an html page will inform about
    # the identifier assigned to the query and the time which will be
    # needed to process it. After retrieving the query number,
    # this function will wait for the files to be completed on the server,
    # then it will download them

    url: str = threeML_config.LAT.query_form

    # Save parameters for the query in a dictionary

    query_parameters = {}
    query_parameters["coordfield"] = "%.4f,%.4f" % (ra, dec)
    query_parameters["coordsystem"] = "J2000"
    query_parameters["shapefield"] = "%s" % radius
    query_parameters["timefield"] = "%s,%s" % (tstart, tstop)
    query_parameters["timetype"] = "%s" % time_type
    query_parameters["energyfield"] = "%.3f,%.3f" % (Emin, Emax)
    query_parameters["photonOrExtendedOrNone"] = data_type
    query_parameters["destination"] = "query"
    query_parameters["spacecraft"] = "checked"

    # Print them out

    log.info("Query parameters:")

    for k, v in query_parameters.items():

        log.info("%30s = %s" % (k, v))

    # Compute a unique ID for this query
    query_unique_id = get_unique_deterministic_tag(str(query_parameters))
    log.info("Query ID: %s" % query_unique_id)

    # Look if there are FT1 and FT2 files in the output directory matching this unique ID

    ft1s = [x for x in destination_directory.glob("*PH??.fits")]
    ft2s = [x for x in destination_directory.glob("*SC??.fits")]

    # Loop over all ft1s and see if there is any matching the uid

    prev_downloaded_ft1s = []
    prev_downloaded_ft2 = None

    for ft1 in ft1s:

        with pyfits.open(ft1) as f:

            this_query_uid = f[0].header.get(_uid_fits_keyword)

            if this_query_uid == query_unique_id:

                # Found one! Append to the list as there might be others

                prev_downloaded_ft1s.append(ft1)
                # break
                pass
    if len(prev_downloaded_ft1s) > 0:

        for ft2 in ft2s:

            with pyfits.open(ft2) as f:

                this_query_uid = f[0].header.get(_uid_fits_keyword)

                if this_query_uid == query_unique_id:
                    # Found one! (FT2 is a single file)
                    prev_downloaded_ft2 = ft2
                    break
    else:
        # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either
        pass

    # If we have both FT1 and FT2 matching the ID, we do not need to download anymore
    if len(prev_downloaded_ft1s) > 0 and prev_downloaded_ft2 is not None:

        log.warning(
            f"Existing event file {prev_downloaded_ft1s} and Spacecraft file {prev_downloaded_ft2} correspond to the same selection. "
            "We assume you did not tamper with them, so we will return those instead of downloading them again. "
            "If you want to download them again, remove them from the outdir")

        return (
            merge_LAT_data(prev_downloaded_ft1s,
                           destination_directory,
                           outfile="L%s_FT1.fits" % query_unique_id,
                           Emin=Emin,
                           Emax=Emax),
            prev_downloaded_ft2,
        )

    # POST encoding

    postData = urllib.parse.urlencode(query_parameters).encode("utf-8")
    temporaryFileName = "__temp_query_result.html"

    # Remove temp file if present

    try:

        os.remove(temporaryFileName)

    except:

        pass

    # This is to avoid caching

    urllib.request.urlcleanup()

    # Get the form compiled
    try:
        urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0,
                                   postData)
    except socket.timeout:

        log.error(
            "Time out when connecting to the server. Check your internet connection, or that the "
            f"form at {url} is accessible, then retry")
        raise RuntimeError()
    except Exception as e:

        log.error(e)
        log.exception(
            "Problems with the download. Check your internet connection, or that the "
            f"form at {url} is accessible, then retry")

        raise RuntimeError()

    # Now open the file, parse it and get the query ID

    with open(temporaryFileName) as htmlFile:

        lines = []

        for line in htmlFile:

            # lines.append(line.encode('utf-8'))
            lines.append(line)

        html = " ".join(lines).strip()

    os.remove(temporaryFileName)

    # Extract data from the response

    parser = DivParser("sec-wrapper")
    parser.feed(html)

    if parser.data == []:

        parser = DivParser("right-side")
        parser.feed(html)

    try:

        # Get line containing the time estimation

        estimatedTimeLine = [
            x for x in parser.data
            if x.find("The estimated time for your query to complete is") == 0
        ][0]

        # Get the time estimate

        estimated_time_for_the_query = re.findall(
            "The estimated time for your query to complete is ([0-9]+) seconds",
            estimatedTimeLine,
        )[0]

    except:

        raise RuntimeError(
            "Problems with the download. Empty or wrong answer from the LAT server. "
            "Please retry later.")

    else:

        log.info(
            f"Estimated complete time for your query: {estimated_time_for_the_query} seconds"
        )

    http_address = [
        x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0
    ][0]

    log.info(
        f"If this download fails, you can find your data at {http_address} (when ready)"
    )

    # Now periodically check if the query is complete

    startTime = time.time()
    timeout = max(1.5 * max(5.0, float(estimated_time_for_the_query)),
                  120)  # Seconds
    refreshTime = min(float(estimated_time_for_the_query) / 2.0,
                      5.0)  # Seconds

    # precompile Url regular expression
    regexpr = re.compile("wget (.*.fits)")

    # Now download every tot seconds the status of the query, until we get status=2 (success)

    links = None
    fakeName = "__temp__query__result.html"

    while time.time() <= startTime + timeout:

        # Try and fetch the html with the results

        try:

            _ = urllib.request.urlretrieve(
                http_address,
                fakeName,
            )

        except socket.timeout:

            urllib.request.urlcleanup()

            log.exception(
                "Time out when connecting to the server. Check your internet connection, or that "
                f"you can access {threeML_config.LAT.query_form}, then retry")

            raise RuntimeError()

        except Exception as e:

            log.error(e)

            urllib.request.urlcleanup()

            log.exception(
                "Problems with the download. Check your connection or that you can access "
                f"{threeML_config.LAT.query_form}, then retry.")

            raise RuntimeError()

        with open(fakeName) as f:

            html = " ".join(f.readlines())

        status = re.findall("The state of your query is ([0-9]+)", html)[0]

        if status == "2":

            # Success! Get the download link
            links = regexpr.findall(html)

            # Remove temp file
            os.remove(fakeName)

            # we're done
            break

        else:

            # Clean up and try again after a while

            os.remove(fakeName)

            urllib.request.urlcleanup()
            time.sleep(refreshTime)

            # Continue to next iteration

    remotePath = "%s/queries/" % threeML_config.LAT.public_http_location

    if links != None:

        filenames = [x.split("/")[-1] for x in links]

        log.info("Downloading FT1 and FT2 files...")

        downloader = ApacheDirectory(remotePath)

        downloaded_files = [
            downloader.download(filename, destination_directory)
            for filename in filenames
        ]

    else:

        log.error("Could not download LAT Standard data")

        raise RuntimeError()

    # Now we need to sort so that the FT1 is always first (they might be out of order)

    # Separate the FT1 and FT2 files:

    FT1 = []
    FT2 = None

    for fits_file in downloaded_files:
        # Open the FITS file and write the unique key for this query, so that the download will not be
        # repeated if not necessary
        with pyfits.open(fits_file, mode="update") as f:

            f[0].header.set(_uid_fits_keyword, query_unique_id)

        if re.match(".+SC[0-9][0-9].fits", str(fits_file)) is not None:

            FT2 = fits_file
        else:

            FT1.append(fits_file)

    # If FT2 is first, switch them, otherwise do nothing
    # if re.match(".+SC[0-9][0-9].fits", downloaded_files[0]) is not None:

    return (merge_LAT_data(FT1,
                           destination_directory,
                           outfile="L%s_FT1.fits" % query_unique_id,
                           Emin=Emin,
                           Emax=Emax), FT2)

Example #7

Show file

def download_GBM_trigger_data(trigger_name: str,
                              detectors: Optional[List[str]] = None,
                              destination_directory: str = ".",
                              compress_tte: bool = True) -> Dict[str, Any]:
    """
    Download the latest GBM TTE and RSP files from the HEASARC server. Will get the
    latest file version and prefer RSP2s over RSPs. If the files already exist in your destination
    directory, they will be skipped in the download process. The output dictionary can be used
    as input to the FermiGBMTTELike class.

    example usage: download_GBM_trigger_data('080916009', detectors=['n0','na','b0'], destination_directory='.')

    :param trigger_name: trigger number (str) e.g. '080916009' or 'bn080916009' or 'GRB080916009'
    :param detectors: list of detectors, default is all detectors
    :param destination_directory: download directory
    :param compress_tte: compress the TTE files via gzip (default True)
    :return: a dictionary with information about the download
    """

    # Let's doctor up the input just in case the user tried something strange

    sanitized_trigger_name_: str = _validate_fermi_trigger_name(trigger_name)

    # create output directory if it does not exists
    destination_directory: Path = sanitize_filename(destination_directory,
                                                    abspath=True)

    if_directory_not_existing_then_make(destination_directory)

    # Sanitize detector list (if any)
    if detectors is not None:

        for det in detectors:

            if det not in _detector_list:
                log.error(
                    f"Detector {det} in the provided list is not a valid detector. "
                    f"Valid choices are: {_detector_list}")
                raise DetDoesNotExist()

    else:

        detectors: List[str] = list(_detector_list)

    # Open heasarc web page

    url = threeML_config.GBM.public_http_location
    year = f"20{sanitized_trigger_name_[:2]}"
    directory = f"/triggers/{year}/bn{sanitized_trigger_name_}/current"

    heasarc_web_page_url = f"{url}/{directory}"

    log.debug(f"going to look in {heasarc_web_page_url}")

    try:

        downloader = ApacheDirectory(heasarc_web_page_url)

    except RemoteDirectoryNotFound:

        log.exception(
            f"Trigger {sanitized_trigger_name_} does not exist at {heasarc_web_page_url}"
        )

        raise TriggerDoesNotExist()

    # Now select the files we want to download, then we will download them later
    # We do it in two steps because we want to be able to choose what to download once we
    # have the complete picture

    # Get the list of remote files
    remote_file_list = downloader.files

    # This is the dictionary to keep track of the classification
    remote_files_info = DictWithPrettyPrint([(det, {}) for det in detectors])

    # Classify the files detector by detector

    for this_file in remote_file_list:

        # this_file is something like glg_tte_n9_bn100101988_v00.fit
        tokens = this_file.split("_")

        if len(tokens) != 5:

            # Not a data file

            continue

        else:

            # The "map" is necessary to transform the tokens to normal string (instead of unicode),
            # because u"b0" != "b0" as a key for a dictionary

            _, file_type, detname, _, version_ext = list(map(str, tokens))

        version, ext = version_ext.split(".")

        # We do not care here about the other files (tcat, bcat and so on),
        # nor about files which pertain to other detectors

        if (file_type not in ["cspec", "tte"]
                or ext not in ["rsp", "rsp2", "pha", "fit"]
                or detname not in detectors):

            continue

        # cspec files can be rsp, rsp2 or pha files. Classify them

        if file_type == "cspec":

            if ext == "rsp":

                remote_files_info[detname]["rsp"] = this_file

            elif ext == "rsp2":

                remote_files_info[detname]["rsp2"] = this_file

            elif ext == "pha":

                remote_files_info[detname]["cspec"] = this_file

            else:

                raise RuntimeError("Should never get here")

        else:

            remote_files_info[detname][file_type] = this_file

    # Now download the files

    download_info = DictWithPrettyPrint([(det, DictWithPrettyPrint())
                                         for det in detectors])

    for detector in list(remote_files_info.keys()):

        log.debug(f"trying to download GBM detector {detector}")

        remote_detector_info = remote_files_info[detector]
        local_detector_info = download_info[detector]

        # Get CSPEC file
        local_detector_info["cspec"] = downloader.download(
            remote_detector_info["cspec"],
            destination_directory,
            progress=True)

        # Get the RSP2 file if it exists, otherwise get the RSP file
        if "rsp2" in remote_detector_info:

            log.debug(f"{detector} has RSP2 responses")

            local_detector_info["rsp"] = downloader.download(
                remote_detector_info["rsp2"],
                destination_directory,
                progress=True)

        else:

            log.debug(f"{detector} has RSP responses")

            local_detector_info["rsp"] = downloader.download(
                remote_detector_info["rsp"],
                destination_directory,
                progress=True)

        # Get TTE file (compressing it if requested)
        local_detector_info["tte"] = downloader.download(
            remote_detector_info["tte"],
            destination_directory,
            progress=True,
            compress=compress_tte,
        )

    return download_info

Example #8

Show file

File: download_LLE_data.py Project: giacomov/3ML

def download_LLE_trigger_data(trigger_name, destination_directory='.'):
    """
    Download the latest Fermi LAT LLE and RSP files from the HEASARC server. Will get the
    latest file versions. If the files already exist in your destination
    directory, they will be skipped in the download process. The output dictionary can be used
    as input to the FermiLATLLELike class.

    example usage: download_LLE_trigger_data('080916009', destination_directory='.')

    :param trigger_name: trigger number (str) with no leading letter e.g. '080916009'
    :param destination_directory: download directory
    :return: a dictionary with information about the download
    """

    sanitized_trigger_name_ = _validate_fermi_trigger_name(trigger_name)

    # create output directory if it does not exists
    destination_directory = sanitize_filename(destination_directory, abspath=True)
    if_directory_not_existing_then_make(destination_directory)

    # Figure out the directory on the server
    url = threeML_config['LAT']['public HTTP location']

    year = '20%s' % sanitized_trigger_name_[:2]
    directory = 'triggers/%s/bn%s/current' % (year, sanitized_trigger_name_)

    heasarc_web_page_url = '%s/%s' % (url, directory)

    try:

        downloader = ApacheDirectory(heasarc_web_page_url)

    except RemoteDirectoryNotFound:

        raise TriggerDoesNotExist("Trigger %s does not exist at %s" % (sanitized_trigger_name_, heasarc_web_page_url))

    # Download only the lle, pt, cspec and rsp file (i.e., do not get all the png, pdf and so on)
    pattern = 'gll_(lle|pt|cspec)_bn.+\.(fit|rsp|pha)'

    destination_directory_sanitized = sanitize_filename(destination_directory)

    downloaded_files = downloader.download_all_files(destination_directory_sanitized, progress=True, pattern=pattern)

    # Put the files in a structured dictionary

    download_info = DictWithPrettyPrint()

    for download in downloaded_files:

        file_type = _file_type_match.match(os.path.basename(download)).group(1)

        if file_type == 'cspec':

            # a cspec file can be 2 things: a CSPEC spectral set (with .pha) extension,
            # or a response matrix (with a .rsp extension)

            ext = os.path.splitext(os.path.basename(download))[1]

            if ext == '.rsp':

                file_type = 'rsp'

            elif ext == '.pha':

                file_type = 'cspec'

            else:

                raise RuntimeError("Should never get here")

        # The pt file is really an ft2 file

        if file_type == 'pt':

            file_type = 'ft2'

        download_info[file_type] = download

    return download_info