def fetch_wmap_temperatures(masked=False, data_home=None, download_if_missing=True): """Loader for WMAP temperature map data Parameters ---------- masked : optional, default=False If True, then return the foreground-masked healpix array of data If False, then return the raw temperature array data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : np.ndarray or np.ma.MaskedArray record array containing (masked) temperature data """ import healpy as hp data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) data_file = os.path.join(data_home, os.path.basename(DATA_URL)) mask_file = os.path.join(data_home, os.path.basename(MASK_URL)) if not os.path.exists(data_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') data_buffer = download_with_progress_bar(DATA_URL) open(data_file, 'wb').write(data_buffer) data = hp.read_map(data_file) if masked: if not os.path.exists(mask_file): if not download_if_missing: raise IOError('mask data not present on disk. ' 'set download_if_missing=True to download') mask_buffer = download_with_progress_bar(MASK_URL) open(mask_file, 'w').write(mask_buffer) mask = hp.read_map(mask_file) data = hp.ma(data) data.mask = np.logical_not(mask) # WMAP mask has 0=bad. We need 1=bad return data
def fetch_wmap_temperatures(masked=False, data_home=None, download_if_missing=True): """Loader for WMAP temperature map data Parameters ---------- masked : optional, default=False If True, then return the foreground-masked healpix array of data If False, then return the raw temperature array data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : np.ndarray or np.ma.MaskedArray record array containing (masked) temperature data """ import healpy as hp data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) data_file = os.path.join(data_home, os.path.basename(DATA_URL)) mask_file = os.path.join(data_home, os.path.basename(MASK_URL)) if not os.path.exists(data_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') data_buffer = download_with_progress_bar(DATA_URL) open(data_file, 'w').write(data_buffer) data = hp.read_map(data_file) if masked: if not os.path.exists(mask_file): if not download_if_missing: raise IOError('mask data not present on disk. ' 'set download_if_missing=True to download') mask_buffer = download_with_progress_bar(MASK_URL) open(mask_file, 'w').write(mask_buffer) mask = hp.read_map(mask_file) data = hp.ma(data) data.mask = np.logical_not(mask) # WMAP mask has 0=bad. We need 1=bad return data
def fetch_LIGO_bigdog(data_home=None, download_if_missing=True): """Loader for LIGO bigdog event Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : record array The data is 10 seconds of measurements from three sites, along with the time of each measurement. Examples -------- >>> from astroML.datasets import fetch_LIGO_bigdog >>> data = fetch_LIGO_bigdog() >>> print data.dtype.names ('t', 'Hanford', 'Livingston', 'Virgo') >>> print data['t'][:3] [ 0.00000000e+00 6.10400000e-05 1.22070000e-04] >>> print data['Hanford'][:3] [ 1.26329846e-17 1.26846778e-17 1.19187381e-17] """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) local_file = os.path.join(data_home, LOCAL_FILE) if os.path.exists(local_file): data = np.load(local_file) else: if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading LIGO bigdog data from %s to %s" % (DATA_URL, local_file)) buffer = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.loadtxt(buffer, skiprows=2, dtype=[('t', 'f8'), ('Hanford', 'f8'), ('Livingston', 'f8'), ('Virgo', 'f8')]) np.save(local_file, data) return data
def fetch_LINEAR_sample(data_home=None, download_if_missing=True): """Loader for LINEAR data sample Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : LINEARdata object A custom object which provides access to 7010 selected LINEAR light curves. """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) targetlist_file = os.path.join(data_home, os.path.basename(TARGETLIST_URL)) data_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(targetlist_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') targets = download_with_progress_bar(TARGETLIST_URL) open(targetlist_file, 'wb').write(targets) if not os.path.exists(data_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') databuffer = download_with_progress_bar(DATA_URL) open(data_file, 'wb').write(databuffer) return LINEARdata(data_file, targetlist_file)
def fetch_LIGO_bigdog(data_home=None, download_if_missing=True): """Loader for LIGO bigdog event Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : record array The data is 10 seconds of measurements from three sites, along with the time of each measurement. Examples -------- >>> from astroML.datasets import fetch_LIGO_bigdog >>> data = fetch_LIGO_bigdog() >>> print data.dtype.names ('t', 'Hanford', 'Livingston', 'Virgo') >>> print data['t'][:3] [ 0.00000000e+00 6.10400000e-05 1.22070000e-04] >>> print data['Hanford'][:3] [ 1.26329846e-17 1.26846778e-17 1.19187381e-17] """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) local_file = os.path.join(data_home, LOCAL_FILE) if os.path.exists(local_file): data = np.load(local_file) else: if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading LIGO bigdog data from %s to %s" % (DATA_URL, local_file)) buffer = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.loadtxt(buffer, skiprows=2, dtype=[('t', 'f8'), ('Hanford', 'f8'), ('Livingston', 'f8'), ('Virgo', 'f8')]) np.save(local_file, data) return data
def fetch_nasa_atlas(data_home=None, download_if_missing=True): """Loader for NASA galaxy atlas data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray The data, in the form of a numpy record array. Notes ----- This is the file created by the example script at examples/datasets/truncate_nsa_data.py For an explanation of the meaning of the fields, see the description at http://www.nsatlas.org/data """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading NASA atlas data from %s to %s" % (DATA_URL, data_home)) buf = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.load(buf) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_nasa_atlas(data_home=None, download_if_missing=True): """Loader for NASA galaxy atlas data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray The data, in the form of a numpy record array. Notes ----- This is the file created by the example script at examples/datasets/truncate_nsa_data.py For an explanation of the meaning of the fields, see the description at http://www.nsatlas.org/data """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading NASA atlas data from %s to %s" % (DATA_URL, data_home)) buf = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.load(buf) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_sdss_corrected_spectra(data_home=None, download_if_missing=True): """Loader for Iterative PCA pre-processed galaxy spectra Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : NpzFile The data dictionary Notes ----- This is the file created by the example script examples/datasets/compute_sdss_pca.py """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading PCA-processed SDSS spectra from %s to %s" % (DATA_URL, data_home)) buf = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.load(buf) data_dict = dict([(key, data[key]) for key in data.files]) np.savez(archive_file, **data_dict) else: data = np.load(archive_file) return data
def fetch_sdss_corrected_spectra(data_home=None, download_if_missing=True): """Loader for Iterative PCA pre-processed galaxy spectra Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : NpzFile The data dictionary Notes ----- This is the file created by the example script examples/datasets/compute_sdss_pca.py """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading PCA-processed SDSS spectra from %s to %s" % (DATA_URL, data_home)) buf = download_with_progress_bar(DATA_URL, return_buffer=True) data = np.load(buf) data_dict = dict([(key, data[key]) for key in data.files]) np.savez(archive_file, **data_dict) else: data = np.load(archive_file) return data
def fetch_LIGO_large(data_home=None, download_if_missing=True): """Loader for LIGO large dataset Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray dt : float data represents ~2000s of amplitude data from LIGO hanford; dt is the time spacing between measurements in seconds. """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) local_file = os.path.join(data_home, LOCAL_FILE_LARGE) if os.path.exists(local_file): data = np.load(local_file) else: if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading LIGO bigdog data from %s to %s" % (DATA_URL_LARGE, local_file)) zipped_buf = download_with_progress_bar(DATA_URL_LARGE, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf) np.save(local_file, data) return data, 1. / 4096
def fetch_LIGO_large(data_home=None, download_if_missing=True): """Loader for LIGO large dataset Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray dt : float data represents ~2000s of amplitude data from LIGO hanford; dt is the time spacing between measurements in seconds. """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) local_file = os.path.join(data_home, LOCAL_FILE_LARGE) if os.path.exists(local_file): data = np.load(local_file) else: if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading LIGO bigdog data from %s to %s" % (DATA_URL_LARGE, local_file)) zipped_buf = download_with_progress_bar(DATA_URL_LARGE, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf) np.save(local_file, data) return data, 1. / 4096
def fetch_LINEAR_geneva(data_home=None, download_if_missing=True): """Loader for LINEAR geneva data. This supplements the LINEAR data above with well-determined periods and other light curve characteristics. Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : record array data on 7000+ LINEAR stars from the Geneva catalog """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, GENEVA_ARCHIVE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') databuffer = download_with_progress_bar(GENEVA_URL) data = np.loadtxt(StringIO(databuffer), dtype=ARCHIVE_DTYPE) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_rrlyrae_templates(data_home=None, download_if_missing=True): """Loader for RR-Lyrae template data These are the light-curve templates from Sesar et al 2010, ApJ 708:717 Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : numpy record array record array containing the templates """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) data_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(data_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') databuffer = download_with_progress_bar(DATA_URL) open(data_file, 'wb').write(databuffer) data = tarfile.open(data_file) return dict([(name.strip('.dat'), np.loadtxt(data.extractfile(name))) for name in data.getnames()])
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False): """Loader for SDSS SEGUE Stellar Parameter Pipeline data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool (optional) default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. cleaned : bool (optional) default=False if True, then return a cleaned catalog where objects with extreme values are removed. Returns ------- data : recarray, shape = (327260,) record array containing pipeline parameters Notes ----- Here are the comments from the fits file header: Imaging data and spectrum identifiers for a sample of 327,260 stars with SDSS spectra, selected as: 1) available SSPP parameters in SDSS Data Release 9 (SSPP rerun 122, file from Y.S. Lee) 2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction) 3) 10 < u < 25 & 10 < z < 25 (same as above) 4) errors in ugriz well measured (>0) and <10 5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened) 6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0 7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr) 8) pmErr < 10 mas/yr (proper motion error) 9) 1 < log(g) < 5 10) TeffErr < 300 K Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s. (ZI, Feb 2012, [email protected]) Examples -------- >>> from astroML.datasets import fetch_sdss_sspp >>> data = fetch_sdss_sspp() >>> data.shape # number of objects in dataset (327260,) >>> print data.names[:5] # names of the first five columns ['ra', 'dec', 'Ar', 'upsf', 'uErr'] >>> print data['ra'][:2] # first two RA values [ 49.62750244 40.27209091] >>> print data['dec'][:2] # first two DEC values [-1.04175591 -0.64250112] """ # pyfits is an optional dependency: don't import globally import pyfits data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError("data not present on disk. " "set download_if_missing=True to download") fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, "wb").write(fitsdata) hdulist = pyfits.open(archive_file) data = np.asarray(hdulist[1].data) if cleaned: # -1.1 < FeH < 0.1 data = data[(data["FeH"] > -1.1) & (data["FeH"] < 0.1)] # -0.03 < alpha/Fe < 0.57 data = data[(data["alphFe"] > -0.03) & (data["alphFe"] < 0.57)] # 5000 < Teff < 6500 data = data[(data["Teff"] > 5000) & (data["Teff"] < 6500)] # 3.5 < log(g) < 5 data = data[(data["logg"] > 3.5) & (data["logg"] < 5)] # 0 < error for FeH < 0.1 data = data[(data["FeHErr"] > 0) & (data["FeHErr"] < 0.1)] # 0 < error for alpha/Fe < 0.05 data = data[(data["alphFeErr"] > 0) & (data["alphFeErr"] < 0.05)] # 15 < g mag < 18 data = data[(data["gpsf"] > 15) & (data["gpsf"] < 18)] # abs(radVel) < 100 km/s data = data[(abs(data["radVel"]) < 100)] return data
def fetch_sdss_specgals(data_home=None, download_if_missing=True): """Loader for SDSS Galaxies with spectral information Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : recarray, shape = (327260,) record array containing pipeline parameters Notes ----- These were compiled from the SDSS database using the following SQL query:: SELECT G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers --- basic spectral data S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr, --- some useful imaging parameters G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r, G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g, G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i, G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r, --- line fluxes for BPT diagram and other derived spec. parameters GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux, GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err, GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux, GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass, GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G, GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE WHERE N.objID = G.objID AND GSI.specObjID = S.specObjID AND GSL.specObjID = S.specObjID AND GSX.specObjID = S.specObjID AND GSE.specObjID = S.specObjID --- add some quality cuts to get rid of obviously bad measurements AND (G.petroMag_r > 10 AND G.petroMag_r < 18) AND (G.modelMag_u-G.modelMag_r) > 0 AND (G.modelMag_u-G.modelMag_r) < 6 AND (modelMag_u > 10 AND modelMag_u < 25) AND (modelMag_g > 10 AND modelMag_g < 25) AND (modelMag_r > 10 AND modelMag_r < 25) AND (modelMag_i > 10 AND modelMag_i < 25) AND (modelMag_z > 10 AND modelMag_z < 25) AND S.rChi2 < 2 AND (S.zErr > 0 AND S.zErr < 0.01) AND S.z > 0.02 --- end of query --- Examples -------- >>> from astroML.datasets import fetch_sdss_specgals >>> data = fetch_sdss_specgals() >>> data.shape # number of objects in dataset (661598,) >>> data.names[:5] # first five column names ['ra', 'dec', 'mjd', 'plate', 'fiberID'] >>> print data['ra'][:3] # first three RA values [ 146.71419105 146.74414186 146.62857334] >>> print data['dec'][:3] # first three declination values [-1.04127639 -0.6522198 -0.7651468 ] """ # pyfits is an optional dependency: don't import globally import pyfits data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'wb').write(fitsdata) hdulist = pyfits.open(archive_file) return np.asarray(hdulist[1].data)
def fetch_sdss_specgals(data_home=None, download_if_missing=True): """Loader for SDSS Galaxies with spectral information Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : recarray, shape = (327260,) record array containing pipeline parameters Notes ----- These were compiled from the SDSS database using the following SQL query:: SELECT G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers --- basic spectral data S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr, --- some useful imaging parameters G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r, G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g, G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i, G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r, --- line fluxes for BPT diagram and other derived spec. parameters GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux, GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err, GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux, GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass, GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G, GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE WHERE N.objID = G.objID AND GSI.specObjID = S.specObjID AND GSL.specObjID = S.specObjID AND GSX.specObjID = S.specObjID AND GSE.specObjID = S.specObjID --- add some quality cuts to get rid of obviously bad measurements AND (G.petroMag_r > 10 AND G.petroMag_r < 18) AND (G.modelMag_u-G.modelMag_r) > 0 AND (G.modelMag_u-G.modelMag_r) < 6 AND (modelMag_u > 10 AND modelMag_u < 25) AND (modelMag_g > 10 AND modelMag_g < 25) AND (modelMag_r > 10 AND modelMag_r < 25) AND (modelMag_i > 10 AND modelMag_i < 25) AND (modelMag_z > 10 AND modelMag_z < 25) AND S.rChi2 < 2 AND (S.zErr > 0 AND S.zErr < 0.01) AND S.z > 0.02 --- end of query --- Examples -------- >>> from astroML.datasets import fetch_sdss_specgals >>> data = fetch_sdss_specgals() >>> data.shape # number of objects in dataset (661598,) >>> data.names[:5] # first five column names ['ra', 'dec', 'mjd', 'plate', 'fiberID'] >>> print data['ra'][:3] # first three RA values [ 146.71419105 146.74414186 146.62857334] >>> print data['dec'][:3] # first three declination values [-1.04127639 -0.6522198 -0.7651468 ] """ # pyfits is an optional dependency: don't import globally import pyfits data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'w').write(fitsdata) hdulist = pyfits.open(archive_file) return np.asarray(hdulist[1].data)
def fetch_moving_objects(data_home=None, download_if_missing=True, Parker2008_cuts=False): """Loader for SDSS moving objects datasets Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Parker2008_cuts : bool (optional) If true, apply cuts on magnitudes and orbital parameters used in Parker et al. 2008 Returns ------- data : recarray, shape = (??,) record array containing 60 values for each item Notes ----- See http://www.astro.washington.edu/users/ivezic/sdssmoc/sdssmoc3.html Columns 0, 35, 45, and 56 are left out of the fetch: they are string parameters. Only columns with known orbital parameters are saved. Examples -------- >>> data = fetch_moving_objects() >>> print len(data) # number of objects 104686 >>> u_g = data['mag_u'] - data['mag_g'] >>> print u_g[:5] # first five u-g colors of the dataset [ 1.48999977 1.80000114 1.78000069 1.65000153 2.01000023] """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading moving object catalog from %s to %s" % (DATA_URL, data_home)) zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf, dtype=ADR4_dtype) # Select unique sources with known orbital elements flag = (data['ast_flag'] == 1) & (data['ast_det_count'] == 1) data = data[flag] np.save(archive_file, data) else: data = np.load(archive_file) if Parker2008_cuts: i_z = data['mag_i'] - data['mag_z'] flag = ((data['aprime'] >= 0.01) & (data['aprime'] <= 100) & (data['mag_a'] <= 0.4) & (data['mag_a'] >= -0.3) & (i_z <= 0.6) & (i_z >= -0.8)) data = data[flag] return data
def fetch_dr7_quasar(data_home=None, download_if_missing=True): """Loader for SDSS DR7 quasar catalog Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : ndarray, shape = (105783,) numpy record array containing the quasar catalog Examples -------- >>> from astroML.datasets import fetch_dr7_quasar >>> data = fetch_dr7_quasar() >>> u_g = data['mag_u'] - data['mag_g'] >>> u_g[:3] # first three u-g colors array([-0.07699966, 0.03600121, 0.10900116], dtype=float32) Notes ----- Not all available data is extracted and saved. The extracted columns are: sdssID, RA, DEC, redshift, mag_u, err_u, mag_g, err_g, mag_r, err_r, mag_i, err_i, mag_z, err_z, mag_J, err_J, mag_H, err_H, mag_K, err_K, specobjid many of the objects are missing 2mass photometry. More information at http://www.sdss.org/dr7/products/value_added/qsocat_dr7.html """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading DR7 quasar dataset from %s to %s" % (DATA_URL, data_home)) zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf, skiprows=SKIP_ROWS, usecols=COLUMN_NUMBERS, dtype=DR7_DTYPE) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_sdss_S82standards(data_home=None, download_if_missing=True, crossmatch_2mass=False): """Loader for SDSS stripe82 standard star catalog Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool, optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. crossmatch_2mass: bool, optional, default=False If True, return the standard star catalog cross-matched with 2mass magnitudes Returns ------- data : ndarray, shape = (313859,) record array containing sdss standard stars (see notes below) Notes ----- Information on the data can be found at http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973). Columns are as follows: RA Right-ascention of source (degrees) DEC Declination of source (degrees) RArms rms of right-ascention (arcsec) DECrms rms of declination (arcsec) Ntot total number of epochs A_r SFD ISM extinction (mags) for each band in (u g r i z): Nobs_<band> number of observations in this band mmed_<band> median magnitude in this band mmu_<band> mean magnitude in this band msig_<band> standard error on the mean (1.25 times larger for median) mrms_<band> root-mean-square scatter mchi2_<band> chi2 per degree of freedom for mean magnitude For 2-MASS, the following columns are added: ra2MASS 2-mass right-ascention dec2MASS 2-mass declination J J-band magnitude Jerr J-band error H H-band magnitude Herr H-band error K K-band magnitude Kerr K-band error theta difference between SDSS and 2MASS position (arcsec) Examples -------- >>> data = fetch_sdss_S82standards() >>> u_g = data['mmed_u'] - data['mmed_g'] >>> print u_g[:5] [-22.23500061 1.34900093 1.43799973 2.08200073 -23.03800011] References ---------- Ivesic et al. ApJ 134:973 (2007) """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) if crossmatch_2mass: archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS) data_url = DATA_URL_2MASS kwargs = dict(dtype=DTYPE_2MASS) else: archive_file = os.path.join(data_home, ARCHIVE_FILE) data_url = DATA_URL kwargs = dict(usecols=COLUMNS, dtype=DTYPE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print ("downloading cross-matched SDSS/2MASS dataset from %s to %s" % (data_url, data_home)) zipped_buf = download_with_progress_bar(data_url, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf, **kwargs) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_sdss_sspp(data_home=None, download_if_missing=True, cleaned=False): """Loader for SDSS SEGUE Stellar Parameter Pipeline data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool (optional) default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. cleaned : bool (optional) default=False if True, then return a cleaned catalog where objects with extreme values are removed. Returns ------- data : recarray, shape = (327260,) record array containing pipeline parameters Notes ----- Here are the comments from the fits file header: Imaging data and spectrum identifiers for a sample of 327,260 stars with SDSS spectra, selected as: 1) available SSPP parameters in SDSS Data Release 9 (SSPP rerun 122, file from Y.S. Lee) 2) 14 < r < 21 (psf magnitudes, uncorrected for ISM extinction) 3) 10 < u < 25 & 10 < z < 25 (same as above) 4) errors in ugriz well measured (>0) and <10 5) 0 < u-g < 3 (all color cuts based on psf mags, dereddened) 6) -0.5 < g-r < 1.5 & -0.5 < r-i < 1.0 & -0.5 < i-z < 1.0 7) -200 < pmL < 200 & -200 < pmB < 200 (proper motion in mas/yr) 8) pmErr < 10 mas/yr (proper motion error) 9) 1 < log(g) < 5 10) TeffErr < 300 K Teff and TeffErr are given in Kelvin, radVel and radVelErr in km/s. (ZI, Feb 2012, [email protected]) Examples -------- >>> from astroML.datasets import fetch_sdss_sspp >>> data = fetch_sdss_sspp() >>> data.shape # number of objects in dataset (327260,) >>> print data.names[:5] # names of the first five columns ['ra', 'dec', 'Ar', 'upsf', 'uErr'] >>> print data['ra'][:2] # first two RA values [ 49.62750244 40.27209091] >>> print data['dec'][:2] # first two DEC values [-1.04175591 -0.64250112] """ # pyfits is an optional dependency: don't import globally import pyfits data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'wb').write(fitsdata) hdulist = pyfits.open(archive_file) data = np.asarray(hdulist[1].data) if cleaned: # -1.1 < FeH < 0.1 data = data[(data['FeH'] > -1.1) & (data['FeH'] < 0.1)] # -0.03 < alpha/Fe < 0.57 data = data[(data['alphFe'] > -0.03) & (data['alphFe'] < 0.57)] # 5000 < Teff < 6500 data = data[(data['Teff'] > 5000) & (data['Teff'] < 6500)] # 3.5 < log(g) < 5 data = data[(data['logg'] > 3.5) & (data['logg'] < 5)] # 0 < error for FeH < 0.1 data = data[(data['FeHErr'] > 0) & (data['FeHErr'] < 0.1)] # 0 < error for alpha/Fe < 0.05 data = data[(data['alphFeErr'] > 0) & (data['alphFeErr'] < 0.05)] # 15 < g mag < 18 data = data[(data['gpsf'] > 15) & (data['gpsf'] < 18)] # abs(radVel) < 100 km/s data = data[(abs(data['radVel']) < 100)] return data
def fetch_imaging_sample(data_home=None, download_if_missing=True): """Loader for SDSS Imaging sample data Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : recarray, shape = (330753,) record array containing imaging data Examples -------- >>> from astroML.datasets import fetch_imaging_sample >>> data = fetch_imaging_sample() >>> data.shape # number of objects in dataset (330753,) >>> print data.names[:5] # names of the first five columns ['ra', 'dec', 'run', 'rExtSFD', 'uRaw'] >>> print data['ra'][:2] [ 0.265165 0.265413] >>> print data['dec'][:2] [-0.444861 -0.62201 ] Notes ----- This data was selected from the SDSS database using the following SQL query:: SELECT round(p.ra,6) as ra, round(p.dec,6) as dec, p.run, --- comments are preceded by --- round(p.extinction_r,3) as rExtSFD, --- r band extinction from SFD round(p.modelMag_u,3) as uRaw, --- ISM-uncorrected model mags round(p.modelMag_g,3) as gRaw, --- rounding up model magnitudes round(p.modelMag_r,3) as rRaw, round(p.modelMag_i,3) as iRaw, round(p.modelMag_z,3) as zRaw, round(p.modelMagErr_u,3) as uErr, --- errors are important! round(p.modelMagErr_g,3) as gErr, round(p.modelMagErr_r,3) as rErr, round(p.modelMagErr_i,3) as iErr, round(p.modelMagErr_z,3) as zErr, round(p.psfMag_u,3) as psfRaw, --- psf magnitudes round(p.psfMag_g,3) as psfRaw, round(p.psfMag_r,3) as psfRaw, round(p.psfMag_i,3) as psfRaw, round(p.psfMag_z,3) as psfRaw, round(p.psfMagErr_u,3) as psfuErr, round(p.psfMagErr_g,3) as psfgErr, round(p.psfMagErr_r,3) as psfrErr, round(p.psfMagErr_i,3) as psfiErr, round(p.psfMagErr_z,3) as psfzErr, p.type, --- tells if a source is resolved or not (case when (p.flags & '16') = 0 then 1 else 0 end) as ISOLATED INTO mydb.SDSSimagingSample FROM PhotoTag p WHERE --- 10x2 sq.deg. p.ra > 0.0 and p.ra < 10.0 and p.dec > -1 and p.dec < 1 --- resolved and unresolved sources and (p.type = 3 OR p.type = 6) and --- '4295229440' is magic code for no --- DEBLENDED_AS_MOVING or SATURATED objects (p.flags & '4295229440') = 0 and --- PRIMARY objects only, which implies --- !BRIGHT && (!BLENDED || NODEBLEND || nchild == 0)] p.mode = 1 and --- adopted faint limit (same as about SDSS limit) p.modelMag_r < 22.5 --- the end of query """ # pyfits is an optional dependency: don't import globally import pyfits data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, os.path.basename(DATA_URL)) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') fitsdata = download_with_progress_bar(DATA_URL) open(archive_file, 'w').write(fitsdata) hdulist = pyfits.open(archive_file) return np.asarray(hdulist[1].data)
def fetch_sdss_S82standards(data_home=None, download_if_missing=True, crossmatch_2mass=False): """Loader for SDSS stripe82 standard star catalog Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : bool, optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. crossmatch_2mass: bool, optional, default=False If True, return the standard star catalog cross-matched with 2mass magnitudes Returns ------- data : ndarray, shape = (313859,) record array containing sdss standard stars (see notes below) Notes ----- Information on the data can be found at http://www.astro.washington.edu/users/ivezic/sdss/catalogs/stripe82.html Data is described in Ivezic et al. 2007 (Astronomical Journal, 134, 973). Columns are as follows: RA Right-ascention of source (degrees) DEC Declination of source (degrees) RArms rms of right-ascention (arcsec) DECrms rms of declination (arcsec) Ntot total number of epochs A_r SFD ISM extinction (mags) for each band in (u g r i z): Nobs_<band> number of observations in this band mmed_<band> median magnitude in this band mmu_<band> mean magnitude in this band msig_<band> standard error on the mean (1.25 times larger for median) mrms_<band> root-mean-square scatter mchi2_<band> chi2 per degree of freedom for mean magnitude For 2-MASS, the following columns are added: ra2MASS 2-mass right-ascention dec2MASS 2-mass declination J J-band magnitude Jerr J-band error H H-band magnitude Herr H-band error K K-band magnitude Kerr K-band error theta difference between SDSS and 2MASS position (arcsec) Examples -------- >>> data = fetch_sdss_S82standards() >>> u_g = data['mmed_u'] - data['mmed_g'] >>> print u_g[:5] [-22.23500061 1.34900093 1.43799973 2.08200073 -23.03800011] References ---------- Ivesic et al. ApJ 134:973 (2007) """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) if crossmatch_2mass: archive_file = os.path.join(data_home, ARCHIVE_FILE_2MASS) data_url = DATA_URL_2MASS kwargs = dict(dtype=DTYPE_2MASS) else: archive_file = os.path.join(data_home, ARCHIVE_FILE) data_url = DATA_URL kwargs = dict(usecols=COLUMNS, dtype=DTYPE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading cross-matched SDSS/2MASS dataset from %s to %s" % (data_url, data_home)) zipped_buf = download_with_progress_bar(data_url, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf, **kwargs) np.save(archive_file, data) else: data = np.load(archive_file) return data
def fetch_moving_objects(data_home=None, download_if_missing=True, Parker2008_cuts=False): """Loader for SDSS moving objects datasets Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/astroML_data' subfolders. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Parker2008_cuts : bool (optional) If true, apply cuts on magnitudes and orbital parameters used in Parker et al. 2008 Returns ------- data : recarray, shape = (??,) record array containing 60 values for each item Notes ----- See http://www.astro.washington.edu/users/ivezic/sdssmoc/sdssmoc3.html Columns 0, 35, 45, and 56 are left out of the fetch: they are string parameters. Only columns with known orbital parameters are saved. Examples -------- >>> data = fetch_moving_objects() >>> print len(data) # number of objects 104686 >>> u_g = data['mag_u'] - data['mag_g'] >>> print u_g[:5] # first five u-g colors of the dataset [ 1.48999977 1.80000114 1.78000069 1.65000153 2.01000023] """ data_home = get_data_home(data_home) if not os.path.exists(data_home): os.makedirs(data_home) archive_file = os.path.join(data_home, ARCHIVE_FILE) if not os.path.exists(archive_file): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') print("downloading moving object catalog from %s to %s" % (DATA_URL, data_home)) zipped_buf = download_with_progress_bar(DATA_URL, return_buffer=True) gzf = GzipFile(fileobj=zipped_buf, mode='rb') print "uncompressing file..." extracted_buf = StringIO(gzf.read()) data = np.loadtxt(extracted_buf, dtype=ADR4_dtype) # Select unique sources with known orbital elements flag = (data['ast_flag'] == 1) & (data['ast_det_count'] == 1) data = data[flag] np.save(archive_file, data) else: data = np.load(archive_file) if Parker2008_cuts: i_z = data['mag_i'] - data['mag_z'] flag = ((data['aprime'] >= 0.01) & (data['aprime'] <= 100) & (data['mag_a'] <= 0.4) & (data['mag_a'] >= -0.3) & (i_z <= 0.6) & (i_z >= -0.8)) data = data[flag] return data