Beispiel #1
0
def fetch_sdss_filter(fname, data_home=None, download_if_missing=True):
    """Loader for SDSS Filter profiles

    Parameters
    ----------
    fname : str
        filter name: must be one of 'ugriz'
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        data is an array of shape (5, Nlam)
        first row: wavelength in angstroms
        second row: sensitivity to point source, airmass 1.3
        third row: sensitivity to extended source, airmass 1.3
        fourth row: sensitivity to extended source, airmass 0.0
        fifth row: assumed atmospheric extinction, airmass 1.0
    """
    if fname not in 'ugriz':
        raise ValueError("Unrecognized filter name '%s'" % fname)
    url = FILTER_URL % fname

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, '%s.dat' % fname)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print "downloading from %s" % url
        F = urllib2.urlopen(url)
        open(archive_file, 'w').write(F.read())

    F = open(archive_file)

    return np.loadtxt(F, unpack=True)
Beispiel #2
0
def fetch_sdss_filter(fname, data_home=None, download_if_missing=True):
    """Loader for SDSS Filter profiles

    Parameters
    ----------
    fname : str
        filter name: must be one of 'ugriz'
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        data is an array of shape (5, Nlam)
        first row: wavelength in angstroms
        second row: sensitivity to point source, airmass 1.3
        third row: sensitivity to extended source, airmass 1.3
        fourth row: sensitivity to extended source, airmass 0.0
        fifth row: assumed atmospheric extinction, airmass 1.0
    """
    if fname not in 'ugriz':
        raise ValueError("Unrecognized filter name '%s'" % fname)
    url = FILTER_URL % fname

    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_file = os.path.join(data_home, '%s.dat' % fname)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading from %s" % url)
        F = urlopen(url)
        open(archive_file, 'wb').write(F.read())

    F = open(archive_file)

    return np.loadtxt(F, unpack=True)
Beispiel #3
0
def fetch_rrlyrae_mags(data_home=None, download_if_missing=True):
    """Loader for RR-Lyrae data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : recarray, shape = (483,)
        record array containing imaging data

    Examples
    --------
    >>> from astroML.datasets import fetch_rrlyrae_mags
    >>> data = fetch_rrlyrae_mags()  # doctest: +IGNORE_OUTPUT
    >>> data.shape  # number of objects in dataset
    (483,)

    Notes
    -----
    This data is from table 1 of Sesar et al 2010 ApJ 708:717
    """
    # fits is an optional dependency: don't import globally
    from astropy.io import fits

    data_home = get_data_home(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'wb').write(fitsdata)

    hdulist = fits.open(archive_file)
    return np.asarray(hdulist[1].data)
Beispiel #4
0
def fetch_vega_spectrum(data_home=None, download_if_missing=True):
    """Loader for Vega reference spectrum

    Parameters
    ----------
    fname : str
        filter name: must be one of 'ugriz'
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        data[0] is the array of wavelength in angstroms
        data[1] is the array of fluxes in Jy (F_nu, not F_lambda)
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_name = os.path.join(data_home, VEGA_URL.split('/')[-1])

    if not os.path.exists(archive_name):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print "downnloading from %s" % VEGA_URL
        F = urllib2.urlopen(VEGA_URL)
        open(archive_name, 'w').write(F.read())

    F = open(archive_name)

    return np.loadtxt(F, unpack=True)
Beispiel #5
0
def fetch_vega_spectrum(data_home=None, download_if_missing=True):
    """Loader for Vega reference spectrum

    Parameters
    ----------
    fname : str
        filter name: must be one of 'ugriz'
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : ndarray
        data[0] is the array of wavelength in angstroms
        data[1] is the array of fluxes in Jy (F_nu, not F_lambda)
    """
    data_home = get_data_home(data_home)
    if not os.path.exists(data_home):
        os.makedirs(data_home)

    archive_name = os.path.join(data_home, VEGA_URL.split('/')[-1])

    if not os.path.exists(archive_name):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downnloading from %s" % VEGA_URL)
        F = urlopen(VEGA_URL)
        open(archive_name, 'wb').write(F.read())

    F = open(archive_name, 'r')

    return np.loadtxt(F, unpack=True)
def fetch_sdss_S82standards(data_home=None,
                            download_if_missing=True,
                            crossmatch_2mass=False):
    """Loader for SDSS stripe82 standard star catalog

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool, optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    crossmatch_2mass: bool, optional, default=False
        If True, return the standard star catalog cross-matched with 2mass
        magnitudes

    Returns
    -------
    data : ndarray, shape = (313859,)
        record array containing sdss standard stars (see notes below)

    Notes
    -----
    Information on the data can be found at
    http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html
    Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973).
    Columns are as follows:

       RA                Right-ascention of source (degrees)
       DEC               Declination of source (degrees)
       RArms             rms of right-ascention (arcsec)
       DECrms            rms of declination (arcsec)
       Ntot              total number of epochs
       A_r               SFD ISM extinction (mags)

       for each band in (u g r i z):
           Nobs_<band>    number of observations in this band
           mmed_<band>    median magnitude in this band
           mmu_<band>     mean magnitude in this band
           msig_<band>    standard error on the mean
                          (1.25 times larger for median)
           mrms_<band>    root-mean-square scatter
           mchi2_<band>   chi2 per degree of freedom for mean magnitude

    For 2-MASS, the following columns are added:

       ra2MASS           2-mass right-ascention
       dec2MASS          2-mass declination
       J                 J-band magnitude
       Jerr              J-band error
       H                 H-band magnitude
       Herr              H-band error
       K                 K-band magnitude
       Kerr              K-band error
       theta             difference between SDSS and 2MASS position (arcsec)

    Examples
    --------
    >>> data = fetch_sdss_S82standards()  # doctest: +IGNORE_OUTPUT
    >>> u_g = data['mmed_u'] - data['mmed_g']
    >>> print(u_g[:4])
    [-22.23500061   1.34900093   1.43799973   2.08200073]

    References
    ----------
    Ivesic et al. ApJ 134:973 (2007)
    """
    data_home = get_data_home(data_home)

    if crossmatch_2mass:
        archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS)
        data_url = DATA_URL_2MASS
        kwargs = dict(dtype=DTYPE_2MASS)

    else:
        archive_file = os.path.join(data_home, ARCHIVE_FILE)
        data_url = DATA_URL
        kwargs = dict(usecols=COLUMNS, dtype=DTYPE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading cross-matched SDSS/2MASS dataset from %s to %s" %
              (data_url, data_home))

        zipped_buf = download_with_progress_bar(data_url, return_buffer=True)
        gzf = GzipFile(fileobj=zipped_buf, mode='rb')
        print("uncompressing file...")
        extracted_buf = BytesIO(gzf.read())
        data = np.loadtxt(extracted_buf, **kwargs)
        np.save(archive_file, data)

    else:
        data = np.load(archive_file)

    return data
Beispiel #7
0
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False):
    """Loader for SDSS SEGUE Stellar Parameter Pipeline data

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/astroML_data' subfolders.

    download_if_missing : bool (optional) default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    cleaned : bool (optional) default=False
        if True, then return a cleaned catalog where objects with extreme
        values are removed.

    Returns
    -------
    data : recarray, shape = (327260,)
        record array containing pipeline parameters

    Notes
    -----
    Here are the comments from the fits file header:

    Imaging data and spectrum identifiers for a sample of 327,260
    stars with SDSS spectra,  selected as:

      1) available SSPP parameters in SDSS Data Release 9
         (SSPP rerun 122, file from Y.S. Lee)
      2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction)
      3) 10 < u < 25 & 10 < z < 25 (same as above)
      4) errors in ugriz well measured (>0) and <10
      5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened)
      6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0
      7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr)
      8) pmErr < 10 mas/yr (proper motion error)
      9) 1 < log(g) < 5
      10) TeffErr < 300 K

    Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s.
    (ZI, Feb 2012, [email protected])

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_sspp
    >>> data = fetch_sdss_sspp()  # doctest: +IGNORE_OUTPUT
    >>> data.shape  # number of objects in dataset
    (327260,)
    >>> print(data.dtype.names[:5])  # names of the first five columns
    ('ra', 'dec', 'Ar', 'upsf', 'uErr')
    >>> print(data['ra'][:1])  # first RA value
    [49.6275024]
    >>> print(data['dec'][:1])  # first DEC value
    [-1.04175591]
    """
    # fits is an optional dependency: don't import globally
    from astropy.io import fits

    data_home = get_data_home(data_home)

    archive_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        fitsdata = download_with_progress_bar(DATA_URL)
        open(archive_file, 'wb').write(fitsdata)

    hdulist = fits.open(archive_file)

    data = np.asarray(hdulist[1].data)

    if cleaned:
        # -1.1 < FeH < 0.1
        data = data[(data['FeH'] > -1.1) & (data['FeH'] < 0.1)]

        # -0.03 < alpha/Fe < 0.57
        data = data[(data['alphFe'] > -0.03) & (data['alphFe'] < 0.57)]

        # 5000 < Teff < 6500
        data = data[(data['Teff'] > 5000) & (data['Teff'] < 6500)]

        # 3.5 < log(g) < 5
        data = data[(data['logg'] > 3.5) & (data['logg'] < 5)]

        # 0 < error for FeH < 0.1
        data = data[(data['FeHErr'] > 0) & (data['FeHErr'] < 0.1)]

        # 0 < error for alpha/Fe < 0.05
        data = data[(data['alphFeErr'] > 0) & (data['alphFeErr'] < 0.05)]

        # 15 < g mag < 18
        data = data[(data['gpsf'] > 15) & (data['gpsf'] < 18)]

        # abs(radVel) < 100 km/s
        data = data[(abs(data['radVel']) < 100)]

    return data