예제 #1
0
def load_astro_dataset():
    """
    The BUPA dataset can be obtained from
    http://www.cs.huji.ac.il/~shais/datasets/ClassificationDatasets.html
    See description of this dataset at
    http://www.cs.huji.ac.il/~shais/datasets/bupa/bupa.names
    """
    quasars = fetch_dr7_quasar()
    stars = fetch_sdss_sspp()
    quasars = quasars[::5]
    stars = stars[::5]

    Nqso = len(quasars)
    #print 'Numero quasars: ',Nqso
    Nstars = len(stars)
    #print 'Numero estrellas: ',Nstars
    X = empty((Nqso + Nstars, 4), dtype=float)

    X[:Nqso, 0] = quasars['mag_u'] - quasars['mag_g']
    X[:Nqso, 1] = quasars['mag_g'] - quasars['mag_r']
    X[:Nqso, 2] = quasars['mag_r'] - quasars['mag_i']
    X[:Nqso, 3] = quasars['mag_i'] - quasars['mag_z']

    X[Nqso:, 0] = stars['upsf'] - stars['gpsf']
    X[Nqso:, 1] = stars['gpsf'] - stars['rpsf']
    X[Nqso:, 2] = stars['rpsf'] - stars['ipsf']
    X[Nqso:, 3] = stars['ipsf'] - stars['zpsf']

    y = zeros(Nqso + Nstars, dtype=int)
    y[:Nqso] = 1
    y[y == 0] = -1
    #print 'Salida', y
    stars = map(tuple, stars)
    quasars = map(tuple, quasars)
    stars = array(stars)
    quasars = array(quasars)

    #print "Tamano Astro: ", len(X)
    return X, y
예제 #2
0
------------------------------------------
This example shows how to fetch the data from the Segue Stellar Parameter
Pipeline (SSPP), and plot the temperature and surface gravity of a selection
of stars.
"""
# Author: Jake VanderPlas <*****@*****.**>
# License: BSD
#   The figure produced by this code is published in the textbook
#   "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
#   For more information, see http://astroML.github.com
from matplotlib import pyplot as plt
from astroML.datasets import fetch_sdss_sspp

#------------------------------------------------------------
# Fetch the data
data = fetch_sdss_sspp()

# select the first 10000 points
data = data[:10000]

# do some reasonable magnitude cuts
rpsf = data['rpsf']
data = data[(rpsf > 15) & (rpsf < 19)]

# get the desired data
logg = data['logg']
Teff = data['Teff']

#------------------------------------------------------------
# Plot the data
ax = plt.axes()
예제 #3
0
# -*- coding: utf-8 -*-
"""
Created on Tue Aug  7 15:30:22 2018

@author: zyv57124
"""

import numpy as np
from astroML.datasets import fetch_LINEAR_geneva
from astroML.datasets import fetch_dr7_quasar
from astroML.datasets import fetch_sdss_sspp

quasars = fetch_dr7_quasar()
stars = fetch_sdss_sspp()

np.save('quasars.npy', quasars)
np.save('stars.npy', stars)
예제 #4
0
from astroML.datasets import fetch_sdss_sspp
from astroML.decorators import pickle_results
from astroML.plotting.tools import draw_ellipse

#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Get the Segue Stellar Parameters Pipeline data
data = fetch_sdss_sspp(cleaned=True)
X = np.vstack([data['FeH'], data['alphFe']]).T

# truncate dataset for speed
X = X[::5]

#------------------------------------------------------------
# Compute GMM models & AIC/BIC
N = np.arange(1, 14)


@pickle_results("GMM_metallicity.pkl")
def compute_GMM(N, covariance_type='full', n_iter=1000):
    models = [None for n in N]
    for i in range(len(N)):
        print N[i]
예제 #5
0
#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Fetch data and split into training and test samples
from astroML.datasets import fetch_dr7_quasar
from astroML.datasets import fetch_sdss_sspp

quasars = fetch_dr7_quasar()
stars = fetch_sdss_sspp()

# Truncate data for speed
quasars = quasars[::5]
stars = stars[::5]

# stack colors into matrix X
Nqso = len(quasars)
Nstars = len(stars)
X = np.empty((Nqso + Nstars, 4), dtype=float)

X[:Nqso, 0] = quasars['mag_u'] - quasars['mag_g']
X[:Nqso, 1] = quasars['mag_g'] - quasars['mag_r']
X[:Nqso, 2] = quasars['mag_r'] - quasars['mag_i']
X[:Nqso, 3] = quasars['mag_i'] - quasars['mag_z']
예제 #6
0
from sklearn import preprocessing

from astroML.datasets import fetch_sdss_sspp

#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Get the data
np.random.seed(0)
data = fetch_sdss_sspp(cleaned=True)

# cut out some additional strange outliers
data = data[~((data['alphFe'] > 0.4) & (data['FeH'] > -0.3))]

X = np.vstack([data['FeH'], data['alphFe']]).T

#----------------------------------------------------------------------
# Compute clustering with MeanShift
#
# We'll work with the scaled data, because MeanShift finds circular clusters

X_scaled = preprocessing.scale(X)

# The following bandwidth can be automatically detected using
# the routine estimate_bandwidth().  Because bandwidth estimation