def fetch_wmap_temperatures(masked=False,
                            data_home=None,
                            download_if_missing=True):
    """Loader for WMAP temperature map data

    Parameters
    ----------
    masked : optional, default=False
        If True, then return the foreground-masked healpix array of data
        If False, then return the raw temperature array
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : np.ndarray or np.ma.MaskedArray
        record array containing (masked) temperature data
    """
    import healpy as hp

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    data_file = os.path.join(data_home, os.path.basename(DATA_URL))
    mask_file = os.path.join(data_home, os.path.basename(MASK_URL))

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')
        data_buffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(data_buffer)

    data = hp.read_map(data_file)

    if masked:
        if not os.path.exists(mask_file):
            if not download_if_missing:
                raise IOError('mask data not present on disk. '
                              'set download_if_missing=True to download')
            mask_buffer = download_with_progress_bar(MASK_URL)
            open(mask_file, 'w').write(mask_buffer)

        mask = hp.read_map(mask_file)

        data = hp.ma(data)
        data.mask = np.logical_not(mask)  # WMAP mask has 0=bad. We need 1=bad

    return data
Example #2
0
def fetch_wmap_temperatures(masked=False, data_home=None,
                            download_if_missing=True):
    """Loader for WMAP temperature map data

    Parameters
    ----------
    masked : optional, default=False
        If True, then return the foreground-masked healpix array of data
        If False, then return the raw temperature array
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : np.ndarray or np.ma.MaskedArray
        record array containing (masked) temperature data
    """
    import healpy as hp

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    data_file = os.path.join(data_home, os.path.basename(DATA_URL))
    mask_file = os.path.join(data_home, os.path.basename(MASK_URL))

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')
        data_buffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'w').write(data_buffer)

    data = hp.read_map(data_file)

    if masked:
        if not os.path.exists(mask_file):
            if not download_if_missing:
                raise IOError('mask data not present on disk. '
                              'set download_if_missing=True to download')
            mask_buffer = download_with_progress_bar(MASK_URL)
            open(mask_file, 'w').write(mask_buffer)

        mask = hp.read_map(mask_file)

        data = hp.ma(data)
        data.mask = np.logical_not(mask)  # WMAP mask has 0=bad. We need 1=bad

    return data
Example #3
0
def fetch_LIGO_bigdog(data_home=None, download_if_missing=True):
    """Loader for LIGO bigdog event

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : record array
        The data is 10 seconds of measurements from three sites, along with
        the time of each measurement.

    Examples
    --------
    >>> from astroML.datasets import fetch_LIGO_bigdog
    >>> data = fetch_LIGO_bigdog()
    >>> print data.dtype.names
    ('t', 'Hanford', 'Livingston', 'Virgo')
    >>> print data['t'][:3]
    [  0.00000000e+00   6.10400000e-05   1.22070000e-04]
    >>> print data['Hanford'][:3]
    [  1.26329846e-17   1.26846778e-17   1.19187381e-17]
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    local_file = os.path.join(data_home, LOCAL_FILE)

    if os.path.exists(local_file):
        data = np.load(local_file)

    else:
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading LIGO bigdog data from %s to %s"
               % (DATA_URL, local_file))

        buffer = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.loadtxt(buffer, skiprows=2,
                          dtype=[('t', 'f8'),
                                 ('Hanford', 'f8'),
                                 ('Livingston', 'f8'),
                                 ('Virgo', 'f8')])
        np.save(local_file, data)

    return data
Example #4
0
def fetch_LINEAR_sample(data_home=None, download_if_missing=True):
    """Loader for LINEAR data sample

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : LINEARdata object
        A custom object which provides access to 7010 selected LINEAR light
        curves.
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    targetlist_file = os.path.join(data_home, os.path.basename(TARGETLIST_URL))
    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(targetlist_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        targets = download_with_progress_bar(TARGETLIST_URL)
        open(targetlist_file, 'wb').write(targets)

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    return LINEARdata(data_file, targetlist_file)
Example #5
0
def fetch_LINEAR_sample(data_home=None, download_if_missing=True):
    """Loader for LINEAR data sample

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : LINEARdata object
        A custom object which provides access to 7010 selected LINEAR light
        curves.
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    targetlist_file = os.path.join(data_home, os.path.basename(TARGETLIST_URL))
    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(targetlist_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        targets = download_with_progress_bar(TARGETLIST_URL)
        open(targetlist_file, 'wb').write(targets)

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    return LINEARdata(data_file, targetlist_file)
Example #6
0
def fetch_LIGO_bigdog(data_home=None, download_if_missing=True):
    """Loader for LIGO bigdog event

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : record array
        The data is 10 seconds of measurements from three sites, along with
        the time of each measurement.

    Examples
    --------
    >>> from astroML.datasets import fetch_LIGO_bigdog
    >>> data = fetch_LIGO_bigdog()
    >>> print data.dtype.names
    ('t', 'Hanford', 'Livingston', 'Virgo')
    >>> print data['t'][:3]
    [  0.00000000e+00   6.10400000e-05   1.22070000e-04]
    >>> print data['Hanford'][:3]
    [  1.26329846e-17   1.26846778e-17   1.19187381e-17]
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    local_file = os.path.join(data_home, LOCAL_FILE)

    if os.path.exists(local_file):
        data = np.load(local_file)

    else:
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading LIGO bigdog data from %s to %s" %
              (DATA_URL, local_file))

        buffer = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.loadtxt(buffer,
                          skiprows=2,
                          dtype=[('t', 'f8'), ('Hanford', 'f8'),
                                 ('Livingston', 'f8'), ('Virgo', 'f8')])
        np.save(local_file, data)

    return data
Example #7
0
def fetch_nasa_atlas(data_home=None,
                     download_if_missing=True):
    """Loader for NASA galaxy atlas data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        The data, in the form of a numpy record array.

    Notes
    -----
    This is the file created by the example script at
        examples/datasets/truncate_nsa_data.py
    For an explanation of the meaning of the fields, see the description at
        http://www.nsatlas.org/data
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading NASA atlas data from %s to %s"
               % (DATA_URL, data_home))

        buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.load(buf)

        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
Example #8
0
def fetch_nasa_atlas(data_home=None, download_if_missing=True):
    """Loader for NASA galaxy atlas data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        The data, in the form of a numpy record array.

    Notes
    -----
    This is the file created by the example script at
        examples/datasets/truncate_nsa_data.py
    For an explanation of the meaning of the fields, see the description at
        http://www.nsatlas.org/data
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading NASA atlas data from %s to %s" %
              (DATA_URL, data_home))

        buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.load(buf)

        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
def fetch_sdss_corrected_spectra(data_home=None,
                                 download_if_missing=True):
    """Loader for Iterative PCA pre-processed galaxy spectra

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : NpzFile
        The data dictionary

    Notes
    -----
    This is the file created by the example script
    examples/datasets/compute_sdss_pca.py
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading PCA-processed SDSS spectra from %s to %s"
               % (DATA_URL, data_home))

        buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.load(buf)

        data_dict = dict([(key, data[key]) for key in data.files])
        np.savez(archive_file, **data_dict)

    else:
        data = np.load(archive_file)

    return data
Example #10
0
def fetch_sdss_corrected_spectra(data_home=None, download_if_missing=True):
    """Loader for Iterative PCA pre-processed galaxy spectra

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : NpzFile
        The data dictionary

    Notes
    -----
    This is the file created by the example script
    examples/datasets/compute_sdss_pca.py
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading PCA-processed SDSS spectra from %s to %s" %
              (DATA_URL, data_home))

        buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        data = np.load(buf)

        data_dict = dict([(key, data[key]) for key in data.files])
        np.savez(archive_file, **data_dict)

    else:
        data = np.load(archive_file)

    return data
Example #11
0
def fetch_LIGO_large(data_home=None, download_if_missing=True):
    """Loader for LIGO large dataset

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
    dt : float
        data represents ~2000s of amplitude data from LIGO hanford;
        dt is the time spacing between measurements in seconds.
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    local_file = os.path.join(data_home, LOCAL_FILE_LARGE)

    if os.path.exists(local_file):
        data = np.load(local_file)

    else:
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading LIGO bigdog data from %s to %s" %
              (DATA_URL_LARGE, local_file))

        zipped_buf = download_with_progress_bar(DATA_URL_LARGE,
                                                return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf)
        np.save(local_file, data)

    return data, 1. / 4096
Example #12
0
def fetch_LIGO_large(data_home=None, download_if_missing=True):
    """Loader for LIGO large dataset

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
    dt : float
        data represents ~2000s of amplitude data from LIGO hanford;
        dt is the time spacing between measurements in seconds.
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    local_file = os.path.join(data_home, LOCAL_FILE_LARGE)

    if os.path.exists(local_file):
        data = np.load(local_file)

    else:
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading LIGO bigdog data from %s to %s"
               % (DATA_URL_LARGE, local_file))

        zipped_buf = download_with_progress_bar(DATA_URL_LARGE,
                                                return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf)
        np.save(local_file, data)

    return data, 1. / 4096
Example #13
0
def fetch_LINEAR_geneva(data_home=None, download_if_missing=True):
    """Loader for LINEAR geneva data.

    This supplements the LINEAR data above with well-determined periods
    and other light curve characteristics.

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : record array
        data on 7000+ LINEAR stars from the Geneva catalog
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, GENEVA_ARCHIVE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(GENEVA_URL)
        data = np.loadtxt(StringIO(databuffer), dtype=ARCHIVE_DTYPE)
        np.save(archive_file, data)
    else:
        data = np.load(archive_file)

    return data
Example #14
0
def fetch_LINEAR_geneva(data_home=None, download_if_missing=True):
    """Loader for LINEAR geneva data.

    This supplements the LINEAR data above with well-determined periods
    and other light curve characteristics.

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : record array
        data on 7000+ LINEAR stars from the Geneva catalog
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, GENEVA_ARCHIVE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(GENEVA_URL)
        data = np.loadtxt(StringIO(databuffer), dtype=ARCHIVE_DTYPE)
        np.save(archive_file, data)
    else:
        data = np.load(archive_file)

    return data
Example #15
0
def fetch_rrlyrae_templates(data_home=None, download_if_missing=True):
    """Loader for RR-Lyrae template data

    These are the light-curve templates from Sesar et al 2010, ApJ 708:717

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : numpy record array
        record array containing the templates
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    data = tarfile.open(data_file)

    return dict([(name.strip('.dat'),
                  np.loadtxt(data.extractfile(name)))
                 for name in data.getnames()])
Example #16
0
def fetch_rrlyrae_templates(data_home=None, download_if_missing=True):
    """Loader for RR-Lyrae template data

    These are the light-curve templates from Sesar et al 2010, ApJ 708:717

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : numpy record array
        record array containing the templates
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    data = tarfile.open(data_file)

    return dict([(name.strip('.dat'), np.loadtxt(data.extractfile(name)))
                 for name in data.getnames()])
Example #17
0
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False):
    """Loader for SDSS SEGUE Stellar Parameter Pipeline data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool (optional) default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    cleaned : bool (optional) default=False
        if True, then return a cleaned catalog where objects with extreme
        values are removed.

    Returns
    -------
    data : recarray, shape = (327260,)
        record array containing pipeline parameters

    Notes
    -----
    Here are the comments from the fits file header:

    Imaging data and spectrum identifiers for a sample of 327,260
    stars with SDSS spectra,  selected as:

      1) available SSPP parameters in SDSS Data Release 9
         (SSPP rerun 122, file from Y.S. Lee)
      2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction)
      3) 10 < u < 25 & 10 < z < 25 (same as above)
      4) errors in ugriz well measured (>0) and <10
      5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened)
      6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0
      7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr)
      8) pmErr < 10 mas/yr (proper motion error)
      9) 1 < log(g) < 5
      10) TeffErr < 300 K

    Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s.
    (ZI, Feb 2012, [email protected])

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_sspp
    >>> data = fetch_sdss_sspp()
    >>> data.shape  # number of objects in dataset
    (327260,)
    >>> print data.names[:5]  # names of the first five columns
    ['ra', 'dec', 'Ar', 'upsf', 'uErr']
    >>> print data['ra'][:2]  # first two RA values
    [ 49.62750244  40.27209091]
    >>> print data['dec'][:2]  # first two DEC values
    [-1.04175591 -0.64250112]
    """
    # pyfits is an optional dependency: don't import globally
    import pyfits

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError("data not present on disk. " "set download_if_missing=True to download")

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, "wb").write(fitsdata)

    hdulist = pyfits.open(archive_file)

    data = np.asarray(hdulist[1].data)

    if cleaned:
        # -1.1 < FeH < 0.1
        data = data[(data["FeH"] > -1.1) & (data["FeH"] < 0.1)]

        # -0.03 < alpha/Fe < 0.57
        data = data[(data["alphFe"] > -0.03) & (data["alphFe"] < 0.57)]

        # 5000 < Teff < 6500
        data = data[(data["Teff"] > 5000) & (data["Teff"] < 6500)]

        # 3.5 < log(g) < 5
        data = data[(data["logg"] > 3.5) & (data["logg"] < 5)]

        # 0 < error for FeH < 0.1
        data = data[(data["FeHErr"] > 0) & (data["FeHErr"] < 0.1)]

        # 0 < error for alpha/Fe < 0.05
        data = data[(data["alphFeErr"] > 0) & (data["alphFeErr"] < 0.05)]

        # 15 < g mag < 18
        data = data[(data["gpsf"] > 15) & (data["gpsf"] < 18)]

        # abs(radVel) < 100 km/s
        data = data[(abs(data["radVel"]) < 100)]

    return data
Example #18
0
def fetch_sdss_specgals(data_home=None, download_if_missing=True):
    """Loader for SDSS Galaxies with spectral information

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : recarray, shape = (327260,)
        record array containing pipeline parameters

    Notes
    -----
    These were compiled from the SDSS database using the following SQL query::

        SELECT
          G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers
          --- basic spectral data
          S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr,
          --- some useful imaging parameters
          G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r,
          G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g,
          G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i,
          G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r,
          --- line fluxes for BPT diagram and other derived spec. parameters
          GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux,
          GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err,
          GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux,
          GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass,
          GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID
        INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY
          dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G,
          GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE
        WHERE N.objID = G.objID
          AND GSI.specObjID = S.specObjID
          AND GSL.specObjID = S.specObjID
          AND GSX.specObjID = S.specObjID
          AND GSE.specObjID = S.specObjID
          --- add some quality cuts to get rid of obviously bad measurements
          AND (G.petroMag_r > 10 AND G.petroMag_r < 18)
          AND (G.modelMag_u-G.modelMag_r) > 0
          AND (G.modelMag_u-G.modelMag_r) < 6
          AND (modelMag_u > 10 AND modelMag_u < 25)
          AND (modelMag_g > 10 AND modelMag_g < 25)
          AND (modelMag_r > 10 AND modelMag_r < 25)
          AND (modelMag_i > 10 AND modelMag_i < 25)
          AND (modelMag_z > 10 AND modelMag_z < 25)
          AND S.rChi2 < 2
          AND (S.zErr > 0 AND S.zErr < 0.01)
          AND S.z > 0.02
          --- end of query ---

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_specgals
    >>> data = fetch_sdss_specgals()
    >>> data.shape  # number of objects in dataset
    (661598,)
    >>> data.names[:5]  # first five column names
    ['ra', 'dec', 'mjd', 'plate', 'fiberID']
    >>> print data['ra'][:3]  # first three RA values
    [ 146.71419105  146.74414186  146.62857334]
    >>> print data['dec'][:3]  #  first three declination values
    [-1.04127639 -0.6522198  -0.7651468 ]
    """
    # pyfits is an optional dependency: don't import globally
    import pyfits

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'wb').write(fitsdata)

    hdulist = pyfits.open(archive_file)
    return np.asarray(hdulist[1].data)
Example #19
0
def fetch_sdss_specgals(data_home=None, download_if_missing=True):
    """Loader for SDSS Galaxies with spectral information

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : recarray, shape = (327260,)
        record array containing pipeline parameters

    Notes
    -----
    These were compiled from the SDSS database using the following SQL query::

        SELECT
          G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers
          --- basic spectral data
          S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr,
          --- some useful imaging parameters
          G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r,
          G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g,
          G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i,
          G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r,
          --- line fluxes for BPT diagram and other derived spec. parameters
          GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux,
          GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err,
          GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux,
          GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass,
          GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID
        INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY
          dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G,
          GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE
        WHERE N.objID = G.objID
          AND GSI.specObjID = S.specObjID
          AND GSL.specObjID = S.specObjID
          AND GSX.specObjID = S.specObjID
          AND GSE.specObjID = S.specObjID
          --- add some quality cuts to get rid of obviously bad measurements
          AND (G.petroMag_r > 10 AND G.petroMag_r < 18)
          AND (G.modelMag_u-G.modelMag_r) > 0
          AND (G.modelMag_u-G.modelMag_r) < 6
          AND (modelMag_u > 10 AND modelMag_u < 25)
          AND (modelMag_g > 10 AND modelMag_g < 25)
          AND (modelMag_r > 10 AND modelMag_r < 25)
          AND (modelMag_i > 10 AND modelMag_i < 25)
          AND (modelMag_z > 10 AND modelMag_z < 25)
          AND S.rChi2 < 2
          AND (S.zErr > 0 AND S.zErr < 0.01)
          AND S.z > 0.02
          --- end of query ---

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_specgals
    >>> data = fetch_sdss_specgals()
    >>> data.shape  # number of objects in dataset
    (661598,)
    >>> data.names[:5]  # first five column names
    ['ra', 'dec', 'mjd', 'plate', 'fiberID']
    >>> print data['ra'][:3]  # first three RA values
    [ 146.71419105  146.74414186  146.62857334]
    >>> print data['dec'][:3]  #  first three declination values
    [-1.04127639 -0.6522198  -0.7651468 ]
    """
    # pyfits is an optional dependency: don't import globally
    import pyfits

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'w').write(fitsdata)

    hdulist = pyfits.open(archive_file)
    return np.asarray(hdulist[1].data)
Example #20
0
def fetch_moving_objects(data_home=None, download_if_missing=True,
                         Parker2008_cuts=False):
    """Loader for SDSS moving objects datasets

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Parker2008_cuts : bool (optional)
        If true, apply cuts on magnitudes and orbital parameters used in
        Parker et al. 2008

    Returns
    -------
    data : recarray, shape = (??,)
        record array containing 60 values for each item

    Notes
    -----
    See http://www.astro.washington.edu/users/ivezic/sdssmoc/sdssmoc3.html
    Columns 0, 35, 45, and 56 are left out of the fetch: they are string
    parameters.  Only columns with known orbital parameters are saved.

    Examples
    --------
    >>> data = fetch_moving_objects()
    >>> print len(data)  # number of objects
    104686
    >>> u_g = data['mag_u'] - data['mag_g']
    >>> print u_g[:5]  # first five u-g colors of the dataset
    [ 1.48999977  1.80000114  1.78000069  1.65000153  2.01000023]
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading moving object catalog from %s to %s"
               % (DATA_URL, data_home))

        zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf, dtype=ADR4_dtype)

        # Select unique sources with known orbital elements
        flag = (data['ast_flag'] == 1) & (data['ast_det_count'] == 1)
        data = data[flag]

        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    if Parker2008_cuts:
        i_z = data['mag_i'] - data['mag_z']

        flag = ((data['aprime'] >= 0.01) & (data['aprime'] <= 100) &
                (data['mag_a'] <= 0.4) & (data['mag_a'] >= -0.3) &
                (i_z <= 0.6) & (i_z >= -0.8))

        data = data[flag]

    return data
Example #21
0
def fetch_dr7_quasar(data_home=None, download_if_missing=True):
    """Loader for SDSS DR7 quasar catalog

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray, shape = (105783,)
        numpy record array containing the quasar catalog

    Examples
    --------
    >>> from astroML.datasets import fetch_dr7_quasar
    >>> data = fetch_dr7_quasar()
    >>> u_g = data['mag_u'] - data['mag_g']
    >>> u_g[:3]  # first three u-g colors
    array([-0.07699966,  0.03600121,  0.10900116], dtype=float32)

    Notes
    -----
    Not all available data is extracted and saved.  The extracted columns are:

    sdssID, RA, DEC, redshift, mag_u, err_u, mag_g, err_g, mag_r, err_r,
    mag_i, err_i, mag_z, err_z, mag_J, err_J, mag_H, err_H, mag_K, err_K,
    specobjid

    many of the objects are missing 2mass photometry.

    More information at
    http://www.sdss.org/dr7/products/value_added/qsocat_dr7.html
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading DR7 quasar dataset from %s to %s"
               % (DATA_URL, data_home))

        zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf,
                          skiprows=SKIP_ROWS,
                          usecols=COLUMN_NUMBERS,
                          dtype=DR7_DTYPE)
        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
Example #22
0
def fetch_sdss_S82standards(data_home=None, download_if_missing=True,
                            crossmatch_2mass=False):
    """Loader for SDSS stripe82 standard star catalog

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool, optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    crossmatch_2mass: bool, optional, default=False
        If True, return the standard star catalog cross-matched with 2mass
        magnitudes

    Returns
    -------
    data : ndarray, shape = (313859,)
        record array containing sdss standard stars (see notes below)

    Notes
    -----
    Information on the data can be found at
    http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html
    Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973).
    Columns are as follows:

       RA                Right-ascention of source (degrees)
       DEC               Declination of source (degrees)
       RArms             rms of right-ascention (arcsec)
       DECrms            rms of declination (arcsec)
       Ntot              total number of epochs
       A_r               SFD ISM extinction (mags)

       for each band in (u g r i z):
           Nobs_<band>    number of observations in this band
           mmed_<band>    median magnitude in this band
           mmu_<band>     mean magnitude in this band
           msig_<band>    standard error on the mean
                          (1.25 times larger for median)
           mrms_<band>    root-mean-square scatter
           mchi2_<band>   chi2 per degree of freedom for mean magnitude

    For 2-MASS, the following columns are added:

       ra2MASS           2-mass right-ascention
       dec2MASS          2-mass declination
       J                 J-band magnitude
       Jerr              J-band error
       H                 H-band magnitude
       Herr              H-band error
       K                 K-band magnitude
       Kerr              K-band error
       theta             difference between SDSS and 2MASS position (arcsec)

    Examples
    --------
    >>> data = fetch_sdss_S82standards()
    >>> u_g = data['mmed_u'] - data['mmed_g']
    >>> print u_g[:5]
    [-22.23500061   1.34900093   1.43799973   2.08200073 -23.03800011]


    References
    ----------
    Ivesic et al. ApJ 134:973 (2007)
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    if crossmatch_2mass:
        archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS)
        data_url = DATA_URL_2MASS
        kwargs = dict(dtype=DTYPE_2MASS)

    else:
        archive_file = os.path.join(data_home, ARCHIVE_FILE)
        data_url = DATA_URL
        kwargs = dict(usecols=COLUMNS, dtype=DTYPE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print ("downloading cross-matched SDSS/2MASS dataset from %s to %s"
               % (data_url, data_home))

        zipped_buf = download_with_progress_bar(data_url, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf, **kwargs)
        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
Example #23
0
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False):
    """Loader for SDSS SEGUE Stellar Parameter Pipeline data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool (optional) default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    cleaned : bool (optional) default=False
        if True, then return a cleaned catalog where objects with extreme
        values are removed.

    Returns
    -------
    data : recarray, shape = (327260,)
        record array containing pipeline parameters

    Notes
    -----
    Here are the comments from the fits file header:

    Imaging data and spectrum identifiers for a sample of 327,260
    stars with SDSS spectra,  selected as:

      1) available SSPP parameters in SDSS Data Release 9
         (SSPP rerun 122, file from Y.S. Lee)
      2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction)
      3) 10 < u < 25 & 10 < z < 25 (same as above)
      4) errors in ugriz well measured (>0) and <10
      5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened)
      6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0
      7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr)
      8) pmErr < 10 mas/yr (proper motion error)
      9) 1 < log(g) < 5
      10) TeffErr < 300 K

    Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s.
    (ZI, Feb 2012, [email protected])

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_sspp
    >>> data = fetch_sdss_sspp()
    >>> data.shape  # number of objects in dataset
    (327260,)
    >>> print data.names[:5]  # names of the first five columns
    ['ra', 'dec', 'Ar', 'upsf', 'uErr']
    >>> print data['ra'][:2]  # first two RA values
    [ 49.62750244  40.27209091]
    >>> print data['dec'][:2]  # first two DEC values
    [-1.04175591 -0.64250112]
    """
    # pyfits is an optional dependency: don't import globally
    import pyfits

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'wb').write(fitsdata)

    hdulist = pyfits.open(archive_file)

    data = np.asarray(hdulist[1].data)

    if cleaned:
        # -1.1 < FeH < 0.1
        data = data[(data['FeH'] > -1.1) & (data['FeH'] < 0.1)]

        # -0.03 < alpha/Fe < 0.57
        data = data[(data['alphFe'] > -0.03) & (data['alphFe'] < 0.57)]

        # 5000 < Teff < 6500
        data = data[(data['Teff'] > 5000) & (data['Teff'] < 6500)]

        # 3.5 < log(g) < 5
        data = data[(data['logg'] > 3.5) & (data['logg'] < 5)]

        # 0 < error for FeH < 0.1
        data = data[(data['FeHErr'] > 0) & (data['FeHErr'] < 0.1)]

        # 0 < error for alpha/Fe < 0.05
        data = data[(data['alphFeErr'] > 0) & (data['alphFeErr'] < 0.05)]

        # 15 < g mag < 18
        data = data[(data['gpsf'] > 15) & (data['gpsf'] < 18)]

        # abs(radVel) < 100 km/s
        data = data[(abs(data['radVel']) < 100)]

    return data
Example #24
0
def fetch_imaging_sample(data_home=None, download_if_missing=True):
    """Loader for SDSS Imaging sample data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : recarray, shape = (330753,)
        record array containing imaging data

    Examples
    --------
    >>> from astroML.datasets import fetch_imaging_sample
    >>> data = fetch_imaging_sample()
    >>> data.shape  # number of objects in dataset
    (330753,)
    >>> print data.names[:5]  # names of the first five columns
    ['ra', 'dec', 'run', 'rExtSFD', 'uRaw']
    >>> print data['ra'][:2]
    [ 0.265165  0.265413]
    >>> print data['dec'][:2]
    [-0.444861 -0.62201 ]

    Notes
    -----
    This data was selected from the SDSS database using the following SQL
    query::

        SELECT
          round(p.ra,6) as ra, round(p.dec,6) as dec,
          p.run,                              --- comments are preceded by ---
          round(p.extinction_r,3) as rExtSFD, --- r band extinction from SFD
          round(p.modelMag_u,3) as uRaw,      --- ISM-uncorrected model mags
          round(p.modelMag_g,3) as gRaw,      --- rounding up model magnitudes
          round(p.modelMag_r,3) as rRaw,
          round(p.modelMag_i,3) as iRaw,
          round(p.modelMag_z,3) as zRaw,
          round(p.modelMagErr_u,3) as uErr,   --- errors are important!
          round(p.modelMagErr_g,3) as gErr,
          round(p.modelMagErr_r,3) as rErr,
          round(p.modelMagErr_i,3) as iErr,
          round(p.modelMagErr_z,3) as zErr,
          round(p.psfMag_u,3) as psfRaw,      --- psf magnitudes
          round(p.psfMag_g,3) as psfRaw,
          round(p.psfMag_r,3) as psfRaw,
          round(p.psfMag_i,3) as psfRaw,
          round(p.psfMag_z,3) as psfRaw,
          round(p.psfMagErr_u,3) as psfuErr,
          round(p.psfMagErr_g,3) as psfgErr,
          round(p.psfMagErr_r,3) as psfrErr,
          round(p.psfMagErr_i,3) as psfiErr,
          round(p.psfMagErr_z,3) as psfzErr,
          p.type,                   --- tells if a source is resolved or not
          (case when (p.flags & '16') = 0 then 1 else 0 end) as ISOLATED
        INTO mydb.SDSSimagingSample
        FROM PhotoTag p
        WHERE
            --- 10x2 sq.deg.
          p.ra > 0.0 and p.ra < 10.0 and p.dec > -1 and p.dec < 1
            --- resolved and unresolved sources
          and (p.type = 3 OR p.type = 6) and
            --- '4295229440' is magic code for no
            --- DEBLENDED_AS_MOVING or SATURATED objects
          (p.flags & '4295229440') = 0 and
            --- PRIMARY objects only, which implies
            --- !BRIGHT && (!BLENDED || NODEBLEND || nchild == 0)]
          p.mode = 1 and
            --- adopted faint limit (same as about SDSS limit)
          p.modelMag_r < 22.5
        --- the end of query
    """
    # pyfits is an optional dependency: don't import globally
    import pyfits

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'w').write(fitsdata)

    hdulist = pyfits.open(archive_file)
    return np.asarray(hdulist[1].data)
Example #25
0
def fetch_sdss_S82standards(data_home=None,
                            download_if_missing=True,
                            crossmatch_2mass=False):
    """Loader for SDSS stripe82 standard star catalog

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool, optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    crossmatch_2mass: bool, optional, default=False
        If True, return the standard star catalog cross-matched with 2mass
        magnitudes

    Returns
    -------
    data : ndarray, shape = (313859,)
        record array containing sdss standard stars (see notes below)

    Notes
    -----
    Information on the data can be found at
    http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html
    Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973).
    Columns are as follows:

       RA                Right-ascention of source (degrees)
       DEC               Declination of source (degrees)
       RArms             rms of right-ascention (arcsec)
       DECrms            rms of declination (arcsec)
       Ntot              total number of epochs
       A_r               SFD ISM extinction (mags)

       for each band in (u g r i z):
           Nobs_<band>    number of observations in this band
           mmed_<band>    median magnitude in this band
           mmu_<band>     mean magnitude in this band
           msig_<band>    standard error on the mean
                          (1.25 times larger for median)
           mrms_<band>    root-mean-square scatter
           mchi2_<band>   chi2 per degree of freedom for mean magnitude

    For 2-MASS, the following columns are added:

       ra2MASS           2-mass right-ascention
       dec2MASS          2-mass declination
       J                 J-band magnitude
       Jerr              J-band error
       H                 H-band magnitude
       Herr              H-band error
       K                 K-band magnitude
       Kerr              K-band error
       theta             difference between SDSS and 2MASS position (arcsec)

    Examples
    --------
    >>> data = fetch_sdss_S82standards()
    >>> u_g = data['mmed_u'] - data['mmed_g']
    >>> print u_g[:5]
    [-22.23500061   1.34900093   1.43799973   2.08200073 -23.03800011]

    References
    ----------
    Ivesic et al. ApJ 134:973 (2007)
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    if crossmatch_2mass:
        archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS)
        data_url = DATA_URL_2MASS
        kwargs = dict(dtype=DTYPE_2MASS)

    else:
        archive_file = os.path.join(data_home, ARCHIVE_FILE)
        data_url = DATA_URL
        kwargs = dict(usecols=COLUMNS, dtype=DTYPE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading cross-matched SDSS/2MASS dataset from %s to %s" %
              (data_url, data_home))

        zipped_buf = download_with_progress_bar(data_url, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf, **kwargs)
        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
Example #26
0
def fetch_moving_objects(data_home=None,
                         download_if_missing=True,
                         Parker2008_cuts=False):
    """Loader for SDSS moving objects datasets

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Parker2008_cuts : bool (optional)
        If true, apply cuts on magnitudes and orbital parameters used in
        Parker et al. 2008

    Returns
    -------
    data : recarray, shape = (??,)
        record array containing 60 values for each item

    Notes
    -----
    See http://www.astro.washington.edu/users/ivezic/sdssmoc/sdssmoc3.html
    Columns 0, 35, 45, and 56 are left out of the fetch: they are string
    parameters.  Only columns with known orbital parameters are saved.

    Examples
    --------
    >>> data = fetch_moving_objects()
    >>> print len(data)  # number of objects
    104686
    >>> u_g = data['mag_u'] - data['mag_g']
    >>> print u_g[:5]  # first five u-g colors of the dataset
    [ 1.48999977  1.80000114  1.78000069  1.65000153  2.01000023]
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, ARCHIVE_FILE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading moving object catalog from %s to %s" %
              (DATA_URL, data_home))

        zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print "uncompressing file..."
        extracted_buf = StringIO(gzf.read())
        data = np.loadtxt(extracted_buf, dtype=ADR4_dtype)

        # Select unique sources with known orbital elements
        flag = (data['ast_flag'] == 1) & (data['ast_det_count'] == 1)
        data = data[flag]

        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    if Parker2008_cuts:
        i_z = data['mag_i'] - data['mag_z']

        flag = ((data['aprime'] >= 0.01) & (data['aprime'] <= 100) &
                (data['mag_a'] <= 0.4) & (data['mag_a'] >= -0.3) & (i_z <= 0.6)
                & (i_z >= -0.8))

        data = data[flag]

    return data