def validate(param_queue, mse_queue, train_data, train_r1, val_data, val_r1):
    while True:
        param = param_queue.get()
        if param is None:
            break
        index = param[0]
        g = param[1]
        c = param[2]
        svr = SVR(kernel='rbf', C = c, gamma = g)
        svr.fit(train_data, train_r1)
        val_predict = svr.predict(val_data)
        mse = np.mean((val_predict - val_r1)**2)
        mse_queue.put((index, mse))
###############################################################################
# Generate sample data
import numpy as np

X = np.sort(5*np.random.rand(40, 1), axis=0)
y = np.sin(X).ravel()

###############################################################################
# Add noise to targets
y[::5] += 3*(0.5 - np.random.rand(8))

###############################################################################
# Fit regression model
from scikits.learn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)

###############################################################################
# look at the results
import pylab as pl
pl.scatter(X, y, c='k', label='data')
pl.hold('on')
pl.plot(X, y_rbf, c='g', label='RBF model')
pl.plot(X, y_lin, c='r', label='Linear model')
pl.plot(X, y_poly, c='b', label='Polynomial model')
pl.xlabel('data')
###############################################################################
# Generate sample data
import numpy as np

X = np.sort(5 * np.random.rand(40, 1), axis=0)
y = np.sin(X).ravel()

###############################################################################
# Add noise to targets
y[::5] += 3 * (0.5 - np.random.rand(8))

###############################################################################
# Fit regression model
from scikits.learn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)

###############################################################################
# look at the results
import pylab as pl
pl.scatter(X, y, c='k', label='data')
pl.hold('on')
pl.plot(X, y_rbf, c='g', label='RBF model')
pl.plot(X, y_lin, c='r', label='Linear model')
pl.plot(X, y_poly, c='b', label='Polynomial model')
pl.xlabel('data')
    process[idx].start()

index = 0
for g in gamma:
    for c in C:
        param_queue.put((index, g, c))
        index += 1

for idx in xrange(n_process):
    param_queue.put(None)

val_mse = [None] * len(gamma) * len(C)
for i in xrange(len(gamma) * len(C)):
    mse_entry = mse_queue.get()
    val_mse[mse_entry[0]] = mse_entry[1]
    if i % 1 == 0:
        print 'processed %d' %(i)

best_idx = val_mse.index(min(val_mse))
best_gamma = gamma[best_idx / len(C)]
best_c = C[best_idx % len(C)]
print best_idx, best_gamma, best_c

svr = SVR(kernel='rbf', C = best_c, gamma = best_gamma)
svr.fit(train_data, train_r1)
test_predict = svr.predict(test_data)
mse_r1 = np.mean((test_r1 - test_predict)**2)
mse_r2 = np.mean((test_r2 - test_predict)**2)
print pearsonr(test_r1,test_predict)
print 'mse_r1: %.3f mse_r2: %.3f' %(mse_r1, mse_r2)
Beispiel #5
0
def keptest(infile,outfile,datacol,ploterr,errcol,quality,
	    lcolor,lwidth,fcolor,falpha,labelsize,ticksize,
	    xsize,ysize,fullrange,plotgrid,verbose,logfile,status): 

# log the call 

    hashline = '----------------------------------------------------------------------------'
    kepmsg.log(logfile,hashline,verbose)
    call = 'KEPTEST -- '
    call += 'infile='+infile+' '
    call += 'outfile='+outfile+' '
    call += 'datacol='+datacol+' '
    perr = 'n'
    if (ploterr): perr = 'y'
    call += 'ploterr='+perr+ ' '
    call += 'errcol='+errcol+' '
    qual = 'n'
    if (quality): qual = 'y'
    call += 'quality='+qual+ ' '
    call += 'lcolor='+str(lcolor)+' '
    call += 'lwidth='+str(lwidth)+' '
    call += 'fcolor='+str(fcolor)+' '
    call += 'falpha='+str(falpha)+' '
    call += 'labelsize='+str(labelsize)+' '
    call += 'ticksize='+str(ticksize)+' '
    call += 'xsize='+str(xsize)+' '
    call += 'ysize='+str(ysize)+' '
    frange = 'n'
    if (fullrange): frange = 'y'
    call += 'fullrange='+frange+ ' '
    pgrid = 'n'
    if (plotgrid): pgrid = 'y'
    call += 'plotgrid='+pgrid+ ' '
    chatter = 'n'
    if (verbose): chatter = 'y'
    call += 'verbose='+chatter+' '
    call += 'logfile='+logfile
    kepmsg.log(logfile,call+'\n',verbose)

# start time

    kepmsg.clock('KEPTEST started at',logfile,verbose)

# test log file

    logfile = kepmsg.test(logfile)

# open input file

    if status == 0:
        struct, status = kepio.openfits(infile,'readonly',logfile,verbose)
    if status == 0:
        tstart, tstop, bjdref, cadence, status = kepio.timekeys(struct,infile,logfile,verbose,status)

# read table structure

    if status == 0:
	table, status = kepio.readfitstab(infile,struct[1],logfile,verbose)

# read table columns

    if status == 0:
        intime, status = kepio.readtimecol(infile,table,logfile,verbose)
        #intime += bjdref
	indata, status = kepio.readfitscol(infile,table,datacol,logfile,verbose)
	if (ploterr):
            indataerr, status = kepio.readfitscol(infile,table,errcol,logfile,verbose)
    if status == 0:
        gaps = zeros(len(indata))

# read table quality column

    if status == 0 and quality:
        try:
            qualtest = table.field('SAP_QUALITY')
        except:
            message = 'ERROR -- KEPTEST: no SAP_QUALITY column found in file ' + infile
            message += '. Use keptest quality=n'
            status = kepmsg.err(logfile,message,verbose)
    if status == 0 and quality:
        gaps, status = kepio.readfitscol(infile,table,'SAP_QUALITY',logfile,verbose)       

# close infile

    if status == 0:
	status = kepio.closefits(struct,logfile,verbose)

# remove infinities and bad data

    if status == 0:
	barytime = []; data = []; dataerr = []
        if 'ap_raw' in datacol or 'ap_corr' in datacol:
            cadenom = cadence
        else:
            cadenom = 1.0
	for i in range(len(intime)):
            if numpy.isfinite(indata[i]) and indata[i] != 0.0 and gaps[i] == 0:
                barytime.append(intime[i])
                data.append(indata[i] / cadenom)
                if (ploterr):
                    dataerr.append(indataerr[i])
	barytime = numpy.array(barytime,dtype='float64')
	data = numpy.array(data,dtype='float64')
	if (ploterr):
            dataerr = numpy.array(dataerr,dtype='float64')

# clean up x-axis unit

    if status == 0:
	barytime0 = float(int(tstart / 100) * 100.0)
	barytime -= barytime0
        xlab = 'BJD $-$ %d' % barytime0

# clean up y-axis units

        try:
            nrm = len(str(int(data.max())))-1
        except:
            nrm = 0
	data = data / 10**nrm
	ylab1 = '10$^%d$ e$^-$ s$^{-1}$' % nrm

# data limits

	xmin = barytime.min()
	xmax = barytime.max()
	ymin = data.min()
	ymax = data.max()
	xr = xmax - xmin
	yr = ymax - ymin
	data[0] = ymin - yr * 2.0
	data[-1] = ymin - yr * 2.0
        if fullrange:
            data[0] = 0.0
            data[-1] = 0.0

# define plot formats

        try:
            rc('text', usetex=True)
            rc('font',**{'family':'sans-serif','sans-serif':['sans-serif']})
            params = {'backend': 'png',
                      'axes.linewidth': 2.5,
                      'axes.labelsize': labelsize,
                      'axes.font': 'sans-serif',
                      'axes.fontweight' : 'bold',
                      'text.fontsize': 12,
                      'legend.fontsize': 12,
                      'xtick.labelsize': ticksize,
                      'ytick.labelsize': ticksize}
            pylab.rcParams.update(params)
        except:
            pass

# define size of plot on monitor screen

	pylab.figure(figsize=[xsize,ysize])

# delete any fossil plots in the matplotlib window

        pylab.clf()

	# position axes inside the plotting window

	ax = pylab.axes([0.06,0.1,0.93,0.88])

# force tick labels to be absolute rather than relative

        pylab.gca().xaxis.set_major_formatter(pylab.ScalarFormatter(useOffset=False))
        pylab.gca().yaxis.set_major_formatter(pylab.ScalarFormatter(useOffset=False))

# rotate y labels by 90 deg

        labels = ax.get_yticklabels()
        setp(labels, 'rotation', 90, fontsize=12)
	
# plot data time series as an unbroken line, retaining data gaps

	ltime = []; ldata = []; ldataerr = []; ldatagaps = []
        dt = 0

	# SVR 
	svr_rbf = SVR(kernel='rbf', C=1, gamma=0.1)
	svr_lin = SVR(kernel='linear', C=1)
	svr_poly = SVR(kernel='poly', C=1, degree=2)
	svr_ltime = []; svr_ldata = []


	for i in range(len(indata)):
            if i > 0:
		if numpy.isfinite(indata[i]) and indata[i] != 0.0 : 
			# print intime[i], " ", indata[i]
			ltime.append(intime[i])
			ldata.append(indata[i])
			svr_ltime.append([intime[i]])

	ltime = array(ltime, dtype=float64)
	ldata = array(ldata, dtype=float64)

	if len(ldata) > 0 and len(ltime) > 0 :
		pylab.scatter (ltime, ldata, s=1, color=lcolor, label='Data:Input lightcurve')

	svr_ltime = array(svr_ltime, dtype='float64')
	svr_ldata = array(ldata, dtype='float64')

	svr_ldata_rbf = svr_rbf.fit(svr_ltime, svr_ldata).predict(svr_ltime)

	## Get the transits!
	# Identify the difference of data min. and the regression line
	# = An approximate initial dip value.
	
	ldata_min = min(ldata)
	ldata_min_i = ldata.tolist().index(ldata_min)
	fluxdip = svr_ldata_rbf[ldata_min_i] - ldata_min
	# fluxthresh = (svr_ldata_rbf[ldata_min_i] + ldata_min ) / 2.0
	print "ldata min = ", ldata_min, "fluxdip =", fluxdip
	thresh_x = []; thresh_y = [];

	# Sequentially scan the inputs, look for y-points below the 
	# initial mean. Group the points
	i = 0
	while i < len(ldata):
		# print intime[i], " ", indata[i]
		fluxmin = fluxthresh = svr_ldata_rbf[i] - fluxdip/2.0
		if ldata[i] < fluxthresh:
			thresh_y.append(fluxthresh); thresh_x.append(ltime[i])
		# Identify the local min, calculate difference with regression line.
			while i < len(ldata) and ldata[i] < fluxthresh :
				if ldata[i] < fluxmin:
					fluxmin = ldata[i]
					fluxmin_i = i
				i += 1
			
		# We got the local min, now plot the line,
		# converge the dip value with the newly calculated one.	
			pylab.plot([ ltime[fluxmin_i], ltime[fluxmin_i] ], 
				[ ldata[fluxmin_i], svr_ldata_rbf[fluxmin_i] ], 
				'r-', linewidth=1)
			fluxdip = (fluxdip + svr_ldata_rbf[fluxmin_i] - fluxmin)/2.0
		i += 1


	pylab.plot(thresh_x, thresh_y, c='c', label='Adapted transit threshold')
	pylab.scatter(thresh_x, thresh_y, c='k', s=1)
	pylab.plot(svr_ltime, svr_ldata_rbf, c='g', label='Cum. RBF model')


	if (ploterr):
            ldataerr = numpy.array(ldataerr,dtype='float32')


# plot labels

	pylab.xlabel(xlab, {'color' : 'k'})
        try:
            pylab.ylabel(ylab1, {'color' : 'k'})
        except:
            ylab1 = '10**%d e-/s' % nrm
            pylab.ylabel(ylab1, {'color' : 'k'})

# make grid on plot

	if plotgrid: pylab.grid()

# paint plot into window
	pylab.legend()

        pylab.draw()

# save plot to file

    if status == 0 and outfile.lower() != 'none':
	pylab.savefig(outfile)