-
Notifications
You must be signed in to change notification settings - Fork 1
/
Transitmaker mpfits(TLM)NR.py
242 lines (221 loc) · 15.8 KB
/
Transitmaker mpfits(TLM)NR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#Part of the unbound (free parameters) fit data pipeline
#This is the Second Stage in the data processing pipline: It fits an analtical lightcurve to each transit
#This script stars by load the stellar and planetary parameters and using them to calculate an inital fit: Each fit requires Teff, Logg, Metalicity, Planetary/Stellar radius ratio, impact parameter and the transit duration
# - it is these values that are iterated over inorder to find the best fit.
# Becasue of the way that this fit works, all the parameters are free and so it can produce the best possible fit (given the limitations of mpfits) for each transit. I.e it does not worry about ensuring a realsitic host star
#This script uses mpfits for the iteration, which means that errors in each fitted parameters are returned. It also means that the quality of each fit is judged by its chi^2 value
#This script also saves the fitted and original stellar parameters in new structured arrays along with a seperate file which contains the analytical (fitted) lightcurve
#NOTE: The way that the z values are calculate is very simple, but it is geometrically and physically accurate.
#NOTE: This only differs from it's partner scipt in that it will NOT reprocess anystars which already have saved analytical transit lightcuves - It is used when dealing with large samples
#The code can be interupted (command-c) and them resumed with very little performance loss - perfect for very large samples
import shutil
import numpy as np
import transit
from mpfit import mpfit
import matplotlib
matplotlib.use('PDF')
import matplotlib.pyplot as plt
import os
import warnings
from scipy.interpolate import griddata
warnings.simplefilter('ignore', RuntimeWarning) #The analytical lightcurve generator makes runtime errors but still works - so might as well suppress the errors
def transittest(): # was used to test the analytical transit generator - now not used
z = np.linspace(0,1.4,200)
gammaslist = [[0., 0.], [1., 0.], [2., -1.]]
plt.figure()
for gammas in gammaslist:
f = transit.occultquad(z, 0.1, gammas)
plt.plot(z, f)
plt.savefig('Test.pdf')
return
def zandtcalculator(tvalues, impactparameter, transitduration, R_p_over_R_s):
#first calculate the minimum z value - turns out impact parameter is already a fraction of stellar radius
zmin = impactparameter
#print zmin #tetsing
#and then calculate the max z value, which occurs as transitduration / 2
zmax = 1 + R_p_over_R_s
tmax = transitduration / 2
#now caluclate the relationship between t and z
z_t = (zmax - zmin) / tmax
#now calculate the corresponding z values
zvalues = np.copy(tvalues) #make a copy of the tvalues array to store the z values in
for i in range(0, len(tvalues), 1): #now calculate the zvalue associated with each t, remembering that z cannot be negative
zvalues[i] = np.abs(tvalues[i] * z_t) + zmin
return zvalues
def stellarextractor(KeplerID, Stellardataarray): #a simple function which returns Teff, Logg and Metalicity associated with a particular keplerID
for line in Stellardataarray: #loop through the array which contains the values
if line['KeplerID'] == KeplerID: #when the entry for the given keplerId is found
Teff = line['Teff'] #extract Teff,
Logg = line['Logg']
Metalicity = line['Metalicity']
Rheader = line['StellarRadius']
break #end the loop
return Teff, Logg, Metalicity, Rheader #and return them
def transitplot(t, f, KeplerID, planetnumber):
plt.clf() #clear all figures
plt.figure() #create a figure for this plot
plt.plot(t, f) #plot the data
plt.xlabel('Time From Mid-Transit (Days)')
plt.ylabel('Normalised Flux')
plt.title('Analytical Transit Lightcuves')
plt.savefig('kepler-{0}-planet-{1}-analyticaltransitlc.pdf'.format(KeplerID, planetnumber))
return
def comtransitplot(t, f, kt, kf, KeplerID, planetnumber, KOI):
std = np.std(kf)
plt.clf() #clear all figures
plt.figure() #create a figure for this plot
f = plt.plot( kt, kf, 'b+', t, f, 'r-', label = ['Kepler', 'Analytical']) #plot both sets of data
plt.legend([f], ['Kepler', 'Analytical'] ) #adds a legend to the plot
plt.xlabel('Time From Mid-Transit (Days)')
plt.ylabel('Normalised Flux')
plt.ylim([(1-6*std),(1+6*std)])
plt.title('Kepler and Analytical Transit Lightcurves')
plt.savefig('kepler-{0}-planet-{1}-comptranlc.pdf'.format(KeplerID, planetnumber))
return
def transitcalcloop():
plandataarray = np.load('plandataarray.npy') #load the planetary data array
stellardataarray = np.load('Stellardataarray.npy') #load the stellar data array (No longer need to run LDCcalc beforehand
for line in plandataarray: #now loop through the planetary data array, in which line represents a single planet
print 'For Kepler {}, planet {}:'.format(line['KeplerID'], line['planetnumber'])
if os.path.exists('kepler-{0}-planet-{1}-analyticaltransitlc.npy'.format(line['KeplerID'], line['planetnumber'])) == False:
Teff, Logg, Metalicity, Rheader = stellarextractor(line['KeplerID'], stellardataarray) #extract the gamma values
Keplerlc = np.load('kepler-{0}-planet-{1}-trandata.npy'.format(line['KeplerID'], line['planetnumber'])) #extract the kepler transit data
tvalues = np.sort(Keplerlc['Foldtime']) #create an array of times associated with each data point and sort them
Keplertransit = np.sort(Keplerlc, order = 'Foldtime') #sort the kepler data in the same way
sigma = sigmacalc(Keplertransit)
impactpar, T_dur, R_plan, Teff_fit, Logg_fit, Metalicity_fit, bestfitarray, errors, covar = zandtfit(line, Keplerlc, Teff, Logg, Metalicity, sigma) #calculate the t and z values
gamma1, gamma2, = gammainterpolator(Teff_fit, Logg_fit, np.log10(Metalicity_fit))
zvalues = zandtcalculator(tvalues, impactpar, T_dur, R_plan)
f = transit.occultquad(zvalues, R_plan, [gamma1, gamma2]) #calculate/find the analytical transit lightcurve
#transitplot(tvalues, f, line['KeplerID'], line['planetnumber']) #plot the analytical transit lightcurve
comtransitplot(tvalues, f, Keplertransit['Foldtime'], Keplertransit['Flux'], line['KeplerID'], line['planetnumber'], line['KOI']) #plot both the kepler and anaytical transit data on the same graph
analyticaltransitlc = np.vstack((tvalues, f))
chi, chireduced = chicalc(Keplertransit, analyticaltransitlc, sigma, T_dur)
#print analyticaltransitlc
print 'The new impactparameter is {}+-{}, the new duration is {}+-{} and the new radius is {}+-{}'. format(impactpar, errors[0], T_dur, errors[1], R_plan, errors[2])
np.save('kepler-{0}-planet-{1}-analyticaltransitlc'.format(line['KeplerID'], line['planetnumber']), analyticaltransitlc) #save the analytical transit
np.save('kepler-{0}-planet-{1}-covariance'.format(line['KeplerID'], line['planetnumber']), covar) #save the covariance array
saveplandata(line, impactpar, T_dur, R_plan, line['planetnumber'], line['KeplerID'], errors)
savefittedstellardata(line['KeplerID'], Teff, Teff_fit, errors[3], Logg, Logg_fit, errors[4], Metalicity, Metalicity_fit, errors[5], Rheader, gamma1, gamma2, line['planetnumber'])
else: print 'Already Done'
return
def saveplandata(tdata, impactpar, translength, R_plan, plannumber, KeplerID, errors): #this function will save the planetary data
dtypesplan = [('KeplerID', int), ('KOI', float), ('Transitduration', float), ('Transitsuration_error', float), ('planetnumber', int), ('Tfirsttran', float), ('Orbitalperiod', float),\
('a/R_star', float), ('R_plan/R_star', float), ('R_plan_error', float), ('Impactpar', float), ('Impactpar_error', float), ('semimajoraxis', float)] #data types for the planetary array
values = [(tdata['KeplerID'], tdata['KOI'], translength, errors[1], plannumber, tdata['Tfirsttran'], tdata['Orbitalperiod'], tdata['a/R_star'], R_plan, errors[2], impactpar, errors[0], tdata['semimajoraxis'])] #values to be stored in the planetary data array
planarray = np.array(values, dtype = dtypesplan)
#print planarray #testing
if os.path.exists('plandataarray(fitted).npy') == False: #If the saved planetary data array does not yet exist, create an array to contain this data
plandataarray = np.copy(planarray)
else: #otherwise load the saved planetary data array and append the new planetary data to it
plandataarray = np.load('plandataarray(fitted).npy')
plandataarray = np.concatenate((plandataarray, planarray), axis=0)
#print plandataarray #Testing
np.save('plandataarray(fitted)', plandataarray) #save the updated/new planetary data array as a numpy binary file
return
def savefittedstellardata(KeplerID, Teff_kep, Teff_fit, Teff_error, Logg_kep, Logg_fit, Logg_error, Metalicity_kep, Metalicity_fit, Metalicity_error, Rheader, gamma1, gamma2, planetnumber):
dtypestar = [('KeplerID', int), ('Teff_kep', float), ('Teff_fit', float), ('Teff_error', float), ('Logg_kep', float), ('Logg_fit', float), ('Logg_error', float),\
('Metalicity_kep', float), ('Metalicity_fit', float), ('Metalicity_error', float), ('Rheader', float), ('gamma1', float), ('gamma2', float), ('planetnumber', float)] #data types for the stellar data array
values = [(KeplerID, Teff_kep, Teff_fit, Teff_error, Logg_kep, Logg_fit, Logg_error, Metalicity_kep, Metalicity_fit, Metalicity_error, Rheader, gamma1, gamma2, planetnumber)]
STARarray = np.array(values, dtype = dtypestar)
#print planarray #testing
if os.path.exists('Stellardataarray(fitted).npy') == False: #If the saved planetary data array does not yet exist, create an array to contain this data
STARdataarray = np.copy(STARarray)
else: #otherwise load the saved planetary data array and append the new planetary data to it
STARdataarray = np.load('Stellardataarray(fitted).npy')
STARdataarray = np.concatenate((STARdataarray, STARarray), axis=0)
#print plandataarray #Testing
np.save('Stellardataarray(fitted)', STARdataarray) #save the updated/new planetary data array as a numpy binary file
return
def zandtfit(plandata, Keplerlc, Teff, Logg, Metalicity, sigma):
p0 = [plandata['Impactpar'], plandata['Transitduration'], (plandata['R_plan/R_star']/3), Teff, Logg, np.log10(Metalicity)] #inital guess (from kepler mast)
#print p0
fa = {'Keplerlc':Keplerlc, 'sigma':sigma} #additional variables to be past to the function
parinfo = [{'value':p0[0], 'fixed':False, 'limited':[True, True], 'limits':[0,2]}, {'value':p0[1], 'fixed':False}, {'value':p0[2], 'fixed':False},\
{'value':p0[3], 'fixed':False, 'limited':[True,True], 'limits':[0,40000]}, {'value':p0[4], 'fixed':False, 'limited':[True,True], 'limits':[0,5.0]},\
{'value':p0[5], 'fixed':False, 'limited':[True,True], 'limits':[-5,1]} ]
m = mpfit(minimisefunction, p0, functkw = fa, quiet = 0, maxiter = 25, parinfo = parinfo, ftol=0.0001) #run the mpfit for the best fit
#print m #testing
bestfitarray = m.params
errors = m.perror #these need to be properly stored
covar = m.covar #these need to be properly stored
if errors == None:
errors = [0,0,0,0,0,0,0]
#print bestfitarray
return np.abs(bestfitarray[0]), bestfitarray[1], bestfitarray[2], bestfitarray[3], bestfitarray[4], (10**bestfitarray[5]), bestfitarray, errors, covar
def minimisefunction(x, Keplerlc=None, fjac = None, sigma = None):
impactparameter = x[0] #extract the impact parameter
Transitduration = x[1] #extract the transit duration
R_plan = x[2] #extract the planetary radius
Teff = x[3]
Logg = x[4]
Metalicity = x[5]
gamma1, gamma2 = gammainterpolator(Teff, Logg, Metalicity)
tvalues = np.sort(Keplerlc['Foldtime']) #create an array of times associated with each data point and sort them
Keplertransit = np.sort(Keplerlc, order = 'Foldtime') #sort the kepler data in the same way
zvalues = zandtcalculator(tvalues, impactparameter, Transitduration, R_plan) #calculate the t and z values
f = transit.occultquad(zvalues, R_plan, [gamma1, gamma2]) #calculate/find the analytical transit lightcurve
#transitplot(tvalues, f, line['KeplerID'], line['planetnumber']) #plot the analytical transit lightcurve
tempkeplervalues = np.copy(Keplertransit) #create a copy of the kepler data which will be used to caculate the standard deviation
for i in range(0, len(Keplertransit['Flux']), 1): #loop over the kepler data array, normalising it to the analytical lightcuve
if not 'returnval' in locals():
returnval = np.array((Keplertransit['Flux'][i] - f[i]) / sigma) #calculate the first value to return
else:
temparray = np.array((Keplertransit['Flux'][i] - f[i]) / sigma) #calculate the other values to return
returnval = np.append(returnval, temparray) #and add them to the return array
sigma = np.std(tempkeplervalues['Flux']) #calculate the standard deivation of the normalised lightcurve
status = 0 #this will not error, so it exists purely as a formality
return [status, returnval] #return a list, which is apparently what is required
def sigmacalc(Keplerlc): #cauclates the deviation of all data points - essentailly an error value which can be used to calculate chi^2
for i in range(0,len(Keplerlc), 1): #loops over all the data points iorder to create an array which only contains the near transit data
if Keplerlc['onedayoftran'][i] == True:
if not 'sigmacalcarray' in locals():
sigmacalcarray = Keplerlc['Flux'][i]
else:
sigmacalcarray = np.append(sigmacalcarray, Keplerlc['Flux'][i])
sigma = np.std(sigmacalcarray) #calculates the standard deviation of the near transit points
#print 'Sd is {}'.format(sigma) #testing
return sigma
def chicalc(Keplerlc, analyticallc, sigma, T_dur): #caculates the chi^2 and reduced chi^2 values whilst only using those data points that fall within a transit
count = 0
for i in range(0, len(Keplerlc['Foldtime']), 1): #loops over the kepler lc (and analytical lc) inorder to find each points chi^2 value
if np.abs(Keplerlc['Foldtime'][i]) <= T_dur: #if the data point is part of the required group (i.e. falls within the transit)
count += 1
if not 'chi2' in locals(): #for the first point, create a chi^2 variable
chi2 = (((Keplerlc['Flux'][i] - analyticallc[1][i])**2) / sigma**2)
else: #otherwise add the next points chi^2 value to the total chi^2 value
chi2 = chi2 + (((Keplerlc['Flux'][i] - analyticallc[1][i])**2) / sigma**2)
else:
continue
if count == 0:
return np.nan(), np.nan()
print 'The chi^2 value is {} (intran)'.format(chi2) #Testing
chi2reduced = chi2 / (count + 3) #now calculate the reduced chi^2 value for 4 independent varaibles
print 'The reduced chi^2 value is {} (intran)'.format(chi2reduced) #testing
return chi2, chi2reduced #return both chi^2 values
def gammainterpolator(Teff, Logg, M_H):
#LDCarray = np.loadtxt('KeplerLDC.txt', skiprows=9, usecols = (0,1,2,4,5)) #loads the full array (but only with the quadratic LDC)
#print LDCarray #Testing
if not 'points' in globals():
global points
global gammalist
points = np.loadtxt('KeplerLDC.txt', skiprows=9, usecols=(0,1,2)) #load array of data points (Teff, Logg, M/H)
gammalist = np.loadtxt('KeplerLDC.txt', skiprows=9, usecols=(4,5,5)) #load array of points to be interpolate (third point can be discarded but is required to prevent error)
point = griddata(points, gammalist, (Teff, Logg, M_H), method = 'linear') #linearly interpolate to find the LDC for the given data point
returnarray = np.array([point[0], point[1]]) #create an array to store the data points
if np.isnan(returnarray[0]) == True: #if the 'linear' interpolation fails, use the nearest data point
point = griddata(points, gammalist, (Teff, Logg, M_H), method = 'nearest') #find the nearest data point (nearest gammas)
returnarray = np.array([point[0], point[1]]) #create an array to store the data points
#print returnarray #Testing
return returnarray
def main():
foldername = raw_input('Please enter the name of the folder in which contains the data: ')
if os.path.exists('{}/KeplerLDC.txt') == False:
currentdirectory = os.getcwd() #Get the current directory
dst = '{}/{}/'.format(currentdirectory,foldername) #the destination for the merged data
src = '{}/KeplerLDC.txt'.format(currentdirectory) #the destination for the merged data
shutil.copy(src,dst)
os.chdir('{}'.format(foldername)) #changes the current directory to the specified directory
transitcalcloop()
return
main()