def analyzeTrack(diaList): nTrackPts = len(diaList) (t, ra, dec, ssmId)= getTrackDias(diaList) # check to see whether this is a real track ssmId = np.array(ssmId) bad = np.where(ssmId != ssmId[0]) if len(bad[0]): trackOK = False else: trackOK = True # fit quadratics in t to ra and dec raFit = np.polyfit(t,ra, 2) raFitPts = np.polyval(raFit, t) raChisq = np.sum((raFitPts - ra)**2)/nominalAstroErr**2 raChisqProb = st.chisqprob(raChisq, nTrackPts) decFit = np.polyfit(t,dec, 2) decFitPts = np.polyval(decFit, t) decChisq = np.sum((decFitPts - dec)**2)/nominalAstroErr**2 decChisqProb = st.chisqprob(decChisq, nTrackPts) # print raChisq, decChisq, raChisqProb, decChisqProb # calculate the chisq for each fit, assuming for the moment, fixed error # in each ra and dec measurement of 0.1" # calculate the chisq probability return (trackOK, ssmId[0], t, ra, dec, raChisqProb, decChisqProb)
def lmSarma(ols, w, spDcache): """ LM error test. Implemented as presented in eq. (15) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- sarma : tuple Pair of statistic and p-value for the LM sarma test. """ first = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ (w.n * spDcache.j - spDcache.t) secnd = spDcache.utwuDs ** 2 / spDcache.t lm = first + secnd pval = chisqprob(lm, 2) return (lm[0][0], pval[0][0])
def lmErr(reg, w, spDcache): """ LM error test. Implemented as presented in eq. (9) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- reg : OLS_dev, TSLS_dev, STSLS_dev Instance from a regression class w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- lme : tuple Pair of statistic and p-value for the LM error test. """ lm = spDcache.utwuDs**2 / spDcache.t pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def rlmErr(ols, w, spDcache): """ Robust LM error test. Implemented as presented in eq. (8) of Anselin et al. (1996) [Anselin1996a]_ NOTE: eq. (8) has an errata, the power -1 in the denominator should be inside the square bracket. ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlme : tuple Pair of statistic and p-value for the Robust LM error test. """ nj = ols.n * spDcache.j num = (spDcache.utwuDs - (spDcache.t * spDcache.utwyDs) / nj) ** 2 den = spDcache.t * (1. - (spDcache.t / nj)) lm = num / den pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def rlmLag(ols, w, spDcache): """ Robust LM lag test. Implemented as presented in eq. (12) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlml : tuple Pair of statistic and p-value for the Robust LM lag test. """ lm = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ ((ols.n * spDcache.j) - spDcache.t) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def lmErr(reg, w, spDcache): """ LM error test. Implemented as presented in eq. (9) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- reg : OLS_dev, TSLS_dev, STSLS_dev Instance from a regression class w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- lme : tuple Pair of statistic and p-value for the LM error test. """ lm = spDcache.utwuDs ** 2 / spDcache.t pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def extractChiSquare(fluxarr, errarr, binwidth): if min(fluxarr) * binwidth < 5: b_fluxarr, b_errarr = binflux(fluxarr, errarr) binwidth = 2 * binwidth else: b_fluxarr = fluxarr b_errarr = errarr # Find the weighted average flux #print "binwidth:",binwidth #print b_fluxarr, b_errarr countsum = 0.0 weightsum = 0.0 for i in range(len(b_fluxarr)): countsum += b_fluxarr[i] / (b_errarr[i]**2) weightsum += 1 / (b_errarr[i])**2 avgrate = countsum / weightsum chisq_sum = 0.0 #print "Avg count:",avgrate #print avgflux for i in range(len(b_fluxarr)): chisq_sum += ((b_fluxarr[i] - avgrate)**2) / (b_errarr[i])**2 #print chisq_sum,(b_fluxarr[i])*60,(b_errarr[i])*60 #print "chisq sum:", chisq_sum chisq_pval = st.chisqprob(chisq_sum, len(b_fluxarr)) return chisq_pval
def rlmLag(ols, w, spDcache): """ Robust LM lag test. Implemented as presented in eq. (12) of Anselin et al. (1996) [1]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlml : tuple Pair of statistic and p-value for the Robust LM lag test. References ---------- .. _ Anselin, L., Bera, A. K., Florax, R., Yoon, M. J. (1996) "Simple diagnostic tests for spatial dependence". Regional Science and Urban Economics, 26, 77-104. """ lm = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ ((ols.n * spDcache.j) - spDcache.t) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def lmSarma(ols, w, spDcache): """ LM error test. Implemented as presented in eq. (15) of Anselin et al. (1996) [1]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- sarma : tuple Pair of statistic and p-value for the LM sarma test. References ---------- .. _ Anselin, L., Bera, A. K., Florax, R., Yoon, M. J. (1996) "Simple diagnostic tests for spatial dependence". Regional Science and Urban Economics, 26, 77-104. """ first = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ (w.n * spDcache.j - spDcache.t) secnd = spDcache.utwuDs ** 2 / spDcache.t lm = first + secnd pval = chisqprob(lm, 2) return (lm[0][0], pval[0][0])
def rlmErr(ols, w, spDcache): """ Robust LM error test. Implemented as presented in eq. (8) of Anselin et al. (1996) [1]_ NOTE: eq. (8) has an errata, the power -1 in the denominator should be inside the square bracket. ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlme : tuple Pair of statistic and p-value for the Robust LM error test. References ---------- .. _ Anselin, L., Bera, A. K., Florax, R., Yoon, M. J. (1996) "Simple diagnostic tests for spatial dependence". Regional Science and Urban Economics, 26, 77-104. """ nj = ols.n * spDcache.j num = (spDcache.utwuDs - (spDcache.t * spDcache.utwyDs) / nj) ** 2 den = spDcache.t * (1. - (spDcache.t / nj)) lm = num / den pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def extractChiSquare(fluxarr,errarr,binwidth): if min(fluxarr)*binwidth < 5: b_fluxarr,b_errarr=binflux(fluxarr,errarr) binwidth=2*binwidth else: b_fluxarr=fluxarr b_errarr=errarr # Find the weighted average flux #print "binwidth:",binwidth #print b_fluxarr, b_errarr countsum=0.0 weightsum=0.0 for i in range(len(b_fluxarr)): countsum+=b_fluxarr[i]/(b_errarr[i]**2) weightsum+=1/(b_errarr[i])**2 avgrate=countsum/weightsum chisq_sum=0.0 #print "Avg count:",avgrate #print avgflux for i in range(len(b_fluxarr)): chisq_sum+=((b_fluxarr[i]-avgrate)**2)/(b_errarr[i])**2 #print chisq_sum,(b_fluxarr[i])*60,(b_errarr[i])*60 #print "chisq sum:", chisq_sum chisq_pval=st.chisqprob(chisq_sum,len(b_fluxarr)) return chisq_pval
def rlmLag(ols, w, spDcache): """ Robust LM lag test. Implemented as presented in eq. (12) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlml : tuple Pair of statistic and p-value for the Robust LM lag test. """ lm = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ ((ols.n * spDcache.j) - spDcache.t) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def rlmErr(ols, w, spDcache): """ Robust LM error test. Implemented as presented in eq. (8) of Anselin et al. (1996) [Anselin1996a]_ NOTE: eq. (8) has an errata, the power -1 in the denominator should be inside the square bracket. ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- rlme : tuple Pair of statistic and p-value for the Robust LM error test. """ nj = ols.n * spDcache.j num = (spDcache.utwuDs - (spDcache.t * spDcache.utwyDs) / nj)**2 den = spDcache.t * (1. - (spDcache.t / nj)) lm = num / den pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def lmSarma(ols, w, spDcache): """ LM error test. Implemented as presented in eq. (15) of Anselin et al. (1996) [Anselin1996a]_ ... Attributes ---------- ols : OLS_dev Instance from an OLS_dev regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- sarma : tuple Pair of statistic and p-value for the LM sarma test. """ first = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / \ (w.n * spDcache.j - spDcache.t) secnd = spDcache.utwuDs**2 / spDcache.t lm = first + secnd pval = chisqprob(lm, 2) return (lm[0][0], pval[0][0])
def lmErr(reg, w, spDcache): """ LM error test. Implemented as presented in eq. (9) of Anselin et al. (1996) [1]_ ... Attributes ---------- reg : OLS_dev, TSLS_dev, STSLS_dev Instance from a regression class w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Returns ------- lme : tuple Pair of statistic and p-value for the LM error test. References ---------- .. _ Anselin, L., Bera, A. K., Florax, R., Yoon, M. J. (1996) "Simple diagnostic tests for spatial dependence". Regional Science and Urban Economics, 26, 77-104. """ lm = spDcache.utwuDs ** 2 / spDcache.t pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0])
def uniform_dist_pvalue(dist): # http://en.wikipedia.org/wiki/Pearson's_chi-square_test#Discrete_uniform_distribution Ei = dist.sum() / dist.size Oi = dist chi2 = ((Ei - Oi) ** 2 / Ei).sum() dof = dist.size - 1 pvalue = chisqprob(chi2, dof) return pvalue
def makePrunedSubtrees(remainingAttributes, examples, attributeValues, className, defaultLabel, setScoreFunc, gainFunc, q): """ Creates a classification tree Node and all its children. This returns a Node, which is the root Node of the tree constructed from the passed in parameters. This should be implemented recursively, and handle base cases for zero examples or remainingAttributes as covered in the book. Args: remainingAttributes (list<string>): the names of attributes still not used examples (list<dictionary<str,str>>): list of examples attrValues (dictionary<string,list<string>>): list of possible values for attribute className (str): the name of the class defaultLabel (string): the default label setScoreFunc (func): the function to score classes (ie classEntropy or gini) gainFunc (func): the function to score gain of attributes (ie entropyGain or giniGain) q (float): the Chi-Squared pruning parameter Returns: Node or LeafNode The classification tree node optimal for the remaining set of attributes. """ # Import statement from scipy.stats.stats import chisqprob # Trivial cases if not examples: return LeafNode(defaultLabel) if not [e for e in examples[1:] if e[className] != examples[0][className]]: return LeafNode(examples[0][className]) if not remainingAttributes: return LeafNode(getMostCommonClass(examples, className)) # Non-trivial case attr = max( remainingAttributes, key=lambda x: gainFunc(examples, x, attributeValues[x], className)) attrCount = getAttributeCounts(examples, attr, attributeValues[attr], className) classCount, chiSq, root = getClassCounts(examples, className), 0, Node(attr) for i in attributeValues[attr]: for j in classCount: if j not in attrCount[i]: attrCount[i][j] = 0 fExp = sum(attrCount[i].values()) * float(classCount[j]) / sum( classCount.values()) chiSq += (attrCount[i][j] - fExp)**2 / fExp if chisqprob(chiSq, (len(attrCount) - 1) * len(classCount) - 1) > q: return LeafNode(getMostCommonClass(examples, className)) for v in attributeValues[attr]: root.children[v] = makePrunedSubtrees( [a for a in remainingAttributes if a != attr], getPertinentExamples(examples, attr, v), attributeValues, className, getMostCommonClass(examples, className), setScoreFunc, gainFunc, q) return root
def linreg(timesteps,fluxarr,errarr): #Turn into numpy array timesteps_a = numpy.array(timesteps) fluxarr_a = numpy.array(fluxarr) errarr_a = numpy.array(errarr) fitfunc=lambda p, x: p[0]+p[1]*x errfunc=lambda p,x,y,e: (y-fitfunc(p,x))/e p0=numpy.array([0.001,0.001]) p_res,success=leastsq(errfunc,p0,args=(timesteps_a,fluxarr_a,errarr_a)) err = errfunc(p_res,timesteps_a,fluxarr_a,errarr_a) chisq=sum([abs(x)**2 for x in err]) chisq_pval=st.chisqprob(chisq,len(timesteps)) return p_res[0],p_res[1],chisq,chisq_pval
def analyzeTrack(diaList): nTrackPts = len(diaList) (t, ra, dec, ssmId) = getTrackDias(diaList) # check to see whether this is a real track ssmId = np.array(ssmId) bad = np.where(ssmId != ssmId[0]) if len(bad[0]): trackOK = False else: trackOK = True # fit quadratics in t to ra and dec raFit = np.polyfit(t, ra, 2) raFitPts = np.polyval(raFit, t) raChisq = np.sum((raFitPts - ra)**2) / nominalAstroErr**2 raChisqProb = st.chisqprob(raChisq, nTrackPts) decFit = np.polyfit(t, dec, 2) decFitPts = np.polyval(decFit, t) decChisq = np.sum((decFitPts - dec)**2) / nominalAstroErr**2 decChisqProb = st.chisqprob(decChisq, nTrackPts) # print raChisq, decChisq, raChisqProb, decChisqProb # calculate the chisq for each fit, assuming for the moment, fixed error # in each ra and dec measurement of 0.1" # calculate the chisq probability return (trackOK, ssmId[0], t, ra, dec, raChisqProb, decChisqProb)
def mcnemar2(tup): """ Input args: a, b, c, d- frequencies Output: pvalue of test. """ a = tup[0] b = tup[1] c = tup[2] d = tup[3] chi2testval = float(abs(b-c) **2)/ (b + c) df = 1 pvalue = chisqprob(chi2testval,df) return pvalue
def dofitSCP0401(datfile='HST_SCP_0401.sncosmo.dat', z=1.713, t0=53080.0, dt0=50.0): # TODO : read in the redshift, etc from the header. # read in the obs data sn = ascii.read(datfile, format='commented_header', header_start=-1, data_start=0) # define SALT2 models and set initial guesses for z and t0 salt2ex = sncosmo.Model(source='salt2-extended') salt2ex.source.set_peakmag(0., 'bessellb', 'ab') x0_AB0 = salt2ex.get('x0') x0_from_mB = lambda m: x0_AB0 * 10**(-0.4 * (m)) salt2ex.set(z=1.713, t0=53090.0, x0=x0_from_mB(26.14), x1=0.2, c=-0.1) # salt2ex.set( z=1.33, t0=56814.6, hostebv=0.05, hostr_v=3.1 ) # Do a bounded fit : #res, fit = sncosmo.fit_lc( sn, salt2ex, ['z','t0','x0','x1','c'], # bounds={'z':(1.712,1.714),'t0':(t0-dt0,t0+dt0), # 'x1':(-5.,5.), 'c':(-0.5,3.0) }) res, fit = sncosmo.fit_lc(sn, salt2ex, ['z', 't0', 'x0'], bounds={ 'z': (1.712, 1.714), 't0': (t0 - dt0, t0 + dt0) }) x0 = fit.get('x0') mB = -2.5 * np.log10(x0 / x0_AB0) distmod = mB - -19.19 # MBmodel from Rubin et al 2013 deltamuLCDM = distmod - dm(z) print("mB = %.2f" % mB) print("dist.mod. = %.2f" % distmod) print("Delta.mu_LCDM = %.2f" % deltamuLCDM) chi2 = res.chisq ndof = res.ndof pval = chisqprob(chi2, ndof) print("chi2/dof= %.3f" % (chi2 / float(ndof))) print("p-value = %.3f" % pval) return (sn, fit, res)
def linreg(timesteps, fluxarr, errarr): #Turn into numpy array timesteps_a = numpy.array(timesteps) fluxarr_a = numpy.array(fluxarr) errarr_a = numpy.array(errarr) fitfunc = lambda p, x: p[0] + p[1] * x errfunc = lambda p, x, y, e: (y - fitfunc(p, x)) / e p0 = numpy.array([0.001, 0.001]) p_res, success = leastsq(errfunc, p0, args=(timesteps_a, fluxarr_a, errarr_a)) err = errfunc(p_res, timesteps_a, fluxarr_a, errarr_a) chisq = sum([abs(x)**2 for x in err]) chisq_pval = st.chisqprob(chisq, len(timesteps)) return p_res[0], p_res[1], chisq, chisq_pval
def log_reg(timesteps, fluxarr, errarr): log_fluxarr = [] for x in fluxarr: if (x < 0): print "Flux less than zero?" log_fluxarr.append(math.log(10e-5)) continue log_fluxarr.append(math.log(x)) #log_fluxarr=numpy.array(log_fluxarr) log_err = numpy.log(1 + (errarr / fluxarr)) #log_err=errarr / fluxarr fitfunc = lambda p, x: p[0] + p[1] * x errfunc = lambda p, x, y, err: (y - fitfunc(p, x)) / err pinit = [fluxarr[0], -1] out = leastsq(errfunc, pinit, args=(timesteps, log_fluxarr, log_err)) outf = open("fit.csv", 'w') chisq = 0 binwidth = timesteps[2] - timesteps[1] for i in range(len(timesteps)): fit = float(math.e**(out[0][0] + out[0][1] * timesteps[i])) fit_count = fit * binwidth actual_count = fluxarr[i] * binwidth # With Gehrels' weighting error = (1 + math.sqrt(fluxarr[i] * binwidth + 0.75)) / binwidth pred_error = (1 + math.sqrt(fit * binwidth + 0.75)) / binwidth #print error, pred_error, fit_count, actual_count if pred_error > errarr[i]: #if pred_error > errarr_a[i] and actual_count < 10: #print pred_error,errarr_a[i],actual_count,fit_count chisq += ((fluxarr[i] - fit) / pred_error)**2 #chisq+=((fluxarr_a[i]-fit)/errarr_a[i])**2 error_used = pred_error else: chisq += ((fluxarr[i] - fit) / errarr[i])**2 error_used = errarr[i] outf.write( str(timesteps[i]) + "," + str(fluxarr[i]) + "," + str(error_used) + "," + str(errarr[i]) + "," + str(fit) + "\n") r_chisq = chisq / len(timesteps) chisq_pval = st.chisqprob(chisq, len(timesteps)) return out[0][0], out[0][1], r_chisq, chisq_pval
def akTest(iv, w, spDcache): """ Computes AK-test for the general case (end. reg. + sp. lag) ... Parameters ---------- iv : STSLS_dev Instance from spatial 2SLS regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Attributes ---------- mi : float Moran's I statistic for IV residuals ak : float Square of corrected Moran's I for residuals:: .. math:: ak = \dfrac{N \times I^*}{\phi^2} p : float P-value of the test ToDo: * Code in as Nancy * Compare both """ mi = get_mI(iv, w, spDcache) # Phi2 etwz = np.dot(iv.u.T, (w.sparse * iv.z)) a = np.dot(etwz, np.dot(iv.varb, etwz.T)) s12 = (w.s0 / w.n)**2 phi2 = (spDcache.t + (4.0 / iv.sig2n) * a) / (s12 * w.n) ak = w.n * mi**2 / phi2 pval = chisqprob(ak, 1) return (mi, ak[0][0], pval[0][0])
def akTest(iv, w, spDcache): """ Computes AK-test for the general case (end. reg. + sp. lag) ... Parameters ---------- iv : STSLS_dev Instance from spatial 2SLS regression w : W Spatial weights instance spDcache : spDcache Instance of spDcache class Attributes ---------- mi : float Moran's I statistic for IV residuals ak : float Square of corrected Moran's I for residuals:: .. math:: ak = \dfrac{N \times I^*}{\phi^2} p : float P-value of the test ToDo: * Code in as Nancy * Compare both """ mi = get_mI(iv, w, spDcache) # Phi2 etwz = np.dot(iv.u.T, (w.sparse * iv.z)) a = np.dot(etwz,np.dot(iv.varb,etwz.T)) s12 = (w.s0 / w.n)**2 phi2 = ( spDcache.t + (4.0 / iv.sig2n) * a ) / (s12 * w.n) ak = w.n * mi**2 / phi2 pval = chisqprob(ak, 1) return (mi, ak[0][0], pval[0][0])
def log_reg(timesteps,fluxarr,errarr): log_fluxarr=[] for x in fluxarr: if (x < 0): print "Flux less than zero?" log_fluxarr.append(math.log(10e-5)) continue log_fluxarr.append(math.log(x)) #log_fluxarr=numpy.array(log_fluxarr) log_err=numpy.log(1+(errarr / fluxarr)) #log_err=errarr / fluxarr fitfunc=lambda p,x: p[0]+p[1]*x errfunc=lambda p,x,y,err:(y-fitfunc(p,x))/err pinit=[fluxarr[0],-1] out=leastsq(errfunc,pinit,args=(timesteps,log_fluxarr,log_err)) outf=open("fit.csv",'w') chisq=0 binwidth=timesteps[2]-timesteps[1] for i in range(len(timesteps)): fit=float(math.e**(out[0][0]+out[0][1]*timesteps[i])) fit_count=fit*binwidth actual_count=fluxarr[i]*binwidth # With Gehrels' weighting error = (1+math.sqrt(fluxarr[i]*binwidth+0.75))/binwidth pred_error = (1+math.sqrt(fit*binwidth+0.75))/binwidth #print error, pred_error, fit_count, actual_count if pred_error > errarr[i]: #if pred_error > errarr_a[i] and actual_count < 10: #print pred_error,errarr_a[i],actual_count,fit_count chisq+=((fluxarr[i]-fit)/pred_error)**2 #chisq+=((fluxarr_a[i]-fit)/errarr_a[i])**2 error_used=pred_error else: chisq+=((fluxarr[i]-fit)/errarr[i])**2 error_used=errarr[i] outf.write(str(timesteps[i])+","+str(fluxarr[i])+","+str(error_used)+","+str(errarr[i])+","+str(fit)+"\n") r_chisq=chisq/len(timesteps) chisq_pval=st.chisqprob(chisq,len(timesteps)) return out[0][0],out[0][1],r_chisq,chisq_pval
def dofitSCP0401( datfile='HST_SCP_0401.sncosmo.dat', z=1.713, t0=53080.0, dt0=50.0 ) : # TODO : read in the redshift, etc from the header. # read in the obs data sn = ascii.read( datfile, format='commented_header', header_start=-1, data_start=0 ) # define SALT2 models and set initial guesses for z and t0 salt2ex = sncosmo.Model( source='salt2-extended') salt2ex.source.set_peakmag( 0., 'bessellb', 'ab' ) x0_AB0 = salt2ex.get('x0') x0_from_mB = lambda m : x0_AB0 * 10**(-0.4*(m) ) salt2ex.set( z=1.713, t0=53090.0, x0=x0_from_mB(26.14), x1=0.2, c=-0.1 ) # salt2ex.set( z=1.33, t0=56814.6, hostebv=0.05, hostr_v=3.1 ) # Do a bounded fit : #res, fit = sncosmo.fit_lc( sn, salt2ex, ['z','t0','x0','x1','c'], # bounds={'z':(1.712,1.714),'t0':(t0-dt0,t0+dt0), # 'x1':(-5.,5.), 'c':(-0.5,3.0) }) res, fit = sncosmo.fit_lc( sn, salt2ex, ['z','t0','x0'], bounds={'z':(1.712,1.714),'t0':(t0-dt0,t0+dt0)}) x0 = fit.get( 'x0' ) mB = -2.5*np.log10( x0 / x0_AB0 ) distmod = mB - -19.19 # MBmodel from Rubin et al 2013 deltamuLCDM = distmod - dm(z) print( "mB = %.2f"%mB ) print( "dist.mod. = %.2f"%distmod) print( "Delta.mu_LCDM = %.2f"%deltamuLCDM) chi2 = res.chisq ndof = res.ndof pval = chisqprob( chi2, ndof ) print( "chi2/dof= %.3f"% (chi2/float(ndof) ) ) print( "p-value = %.3f"% pval ) return( sn, fit, res )
def correlationStatistics(predictions, observations, obsError, predError): predictions = numpy.array(predictions) observations = numpy.array(observations) correlation = numpy.corrcoef(predictions, observations, rowvar=0) correlation = correlation[1][0] if predError != 0: obsError = math.sqrt(obsError*obsError + predError*predError) #Chi-Squared #Assuming the observations have an error that is normaly distributed with deviation obsError #Checks if the errors are actually distributed around the fit-line with this deviation #Technically returns the probability that the observed distribution of errors comes from the supposed distribution errors = observations - predictions squaredErrors = numpy.square(errors) mse = numpy.mean(squaredErrors) chisquared = mse*len(squaredErrors)/(math.pow(obsError, 2)) chisquaredProb = stats.chisqprob(chisquared, len(squaredErrors) - 1) degreesOfFreedom = len(squaredErrors) - 1 reducedChisquared = chisquared/degreesOfFreedom return (correlation, chisquared, reducedChisquared, chisquaredProb)
def makePrunedSubtrees(remainingAttributes,examples,attributeValues,className,defaultLabel,setScoreFunc,gainFunc,q): """ Creates a classification tree Node and all its children. This returns a Node, which is the root Node of the tree constructed from the passed in parameters. This should be implemented recursively, and handle base cases for zero examples or remainingAttributes as covered in the book. Args: remainingAttributes (list<string>): the names of attributes still not used examples (list<dictionary<str,str>>): list of examples attrValues (dictionary<string,list<string>>): list of possible values for attribute className (str): the name of the class defaultLabel (string): the default label setScoreFunc (func): the function to score classes (ie classEntropy or gini) gainFunc (func): the function to score gain of attributes (ie entropyGain or giniGain) q (float): the Chi-Squared pruning parameter Returns: Node or LeafNode The classification tree node optimal for the remaining set of attributes. """ #YOUR CODE HERE (Extra Credit) if len(examples) == 0: node = LeafNode(defaultLabel) return node #all examples have the same classification classificaitionAllSame = True tocheck = examples[0][className] for example in examples: if example[className] != tocheck: classificaitionAllSame = False if classificaitionAllSame == True: node = LeafNode(tocheck) return node if len(remainingAttributes) == 0: return LeafNode(getMostCommonClass(examples, className)) maxA = None maxGain = -99999999999 for attr in remainingAttributes: if gainFunc(examples,attr,attributeValues[attr],className) > maxGain: maxGain = gainFunc(examples,attr,attributeValues[attr],className) maxA = attr # chi-square check mydict = getAttributeCounts(examples, maxA, attributeValues[maxA], className) anotherdict = {} for key in mydict.keys(): subsum = 0 for item in mydict[key].keys(): subsum = subsum + mydict[key][item] anotherdict[key] = subsum #class count classCounts = getClassCounts(examples, className) dev = 0 for key in mydict.keys(): chii = 0 for item in mydict[key].keys(): pi = mydict[key][item] * 1.0 pih = (classCounts[item] / (len(examples) * 1.0)) * anotherdict[key] chii = chii + (pi - pih) * (pi - pih) / pih dev = dev + chii v = len(attributeValues[maxA]) - 1 if chisqprob(dev, v) > q: return LeafNode(getMostCommonClass(examples, className)) # add subtree newNode = Node(maxA) newRemain = [] for remAttr in remainingAttributes: if remAttr != maxA: newRemain.append(remAttr) mostCommon = getMostCommonClass(examples,className) mydict = {} for value in attributeValues[maxA]: newExamples = getPertinentExamples(examples,maxA,value); subtreeNode = makePrunedSubtrees(newRemain, newExamples, attributeValues, className, mostCommon, setScoreFunc, gainFunc, q) mydict[value] = subtreeNode newNode.children = mydict return newNode
def makePrunedSubtrees(remainingAttributes, examples, attributeValues, className, defaultLabel, setScoreFunc, gainFunc, q): """ Creates a classification tree Node and all its children. This returns a Node, which is the root Node of the tree constructed from the passed in parameters. This should be implemented recursively, and handle base cases for zero examples or remainingAttributes as covered in the book. Args: remainingAttributes (list<string>): the names of attributes still not used examples (list<dictionary<str,str>>): list of examples attrValues (dictionary<string,list<string>>): list of possible values for attribute className (str): the name of the class defaultLabel (string): the default label setScoreFunc (func): the function to score classes (ie classEntropy or gini) gainFunc (func): the function to score gain of attributes (ie entropyGain or giniGain) q (float): the Chi-Squared pruning parameter Returns: Node or LeafNode The classification tree node optimal for the remaining set of attributes. """ #YOUR CODE HERE (Extra Credit) if len(examples) == 0: node = LeafNode(defaultLabel) return node #all examples have the same classification classificaitionAllSame = True tocheck = examples[0][className] for example in examples: if example[className] != tocheck: classificaitionAllSame = False if classificaitionAllSame == True: node = LeafNode(tocheck) return node if len(remainingAttributes) == 0: return LeafNode(getMostCommonClass(examples, className)) maxA = None maxGain = -99999999999 for attr in remainingAttributes: if gainFunc(examples, attr, attributeValues[attr], className) > maxGain: maxGain = gainFunc(examples, attr, attributeValues[attr], className) maxA = attr # chi-square check mydict = getAttributeCounts(examples, maxA, attributeValues[maxA], className) anotherdict = {} for key in mydict.keys(): subsum = 0 for item in mydict[key].keys(): subsum = subsum + mydict[key][item] anotherdict[key] = subsum #class count classCounts = getClassCounts(examples, className) dev = 0 for key in mydict.keys(): chii = 0 for item in mydict[key].keys(): pi = mydict[key][item] * 1.0 pih = (classCounts[item] / (len(examples) * 1.0)) * anotherdict[key] chii = chii + (pi - pih) * (pi - pih) / pih dev = dev + chii v = len(attributeValues[maxA]) - 1 if chisqprob(dev, v) > q: return LeafNode(getMostCommonClass(examples, className)) # add subtree newNode = Node(maxA) newRemain = [] for remAttr in remainingAttributes: if remAttr != maxA: newRemain.append(remAttr) mostCommon = getMostCommonClass(examples, className) mydict = {} for value in attributeValues[maxA]: newExamples = getPertinentExamples(examples, maxA, value) subtreeNode = makePrunedSubtrees(newRemain, newExamples, attributeValues, className, mostCommon, setScoreFunc, gainFunc, q) mydict[value] = subtreeNode newNode.children = mydict return newNode
def makePrunedSubtrees(remainingAttributes, examples, attributeValues, className, defaultLabel, setScoreFunc, gainFunc, q): """ Creates a classification tree Node and all its children. This returns a Node, which is the root Node of the tree constructed from the passed in parameters. This should be implemented recursively, and handle base cases for zero examples or remainingAttributes as covered in the book. Args: remainingAttributes (list<string>): the names of attributes still not used examples (list<dictionary<str,str>>): list of examples attrValues (dictionary<string,list<string>>): list of possible values for attribute className (str): the name of the class defaultLabel (string): the default label setScoreFunc (func): the function to score classes (ie classEntropy or gini) gainFunc (func): the function to score gain of attributes (ie entropyGain or giniGain) q (float): the Chi-Squared pruning parameter Returns: Node or LeafNode The classification tree node optimal for the remaining set of attributes. """ #YOUR CODE HERE (Extra Credit) # base case if len(examples) == 0: return LeafNode(defaultLabel) label = examples[0][className] same_label = True for example in examples: if example[className] != label: same_label = False if same_label: return LeafNode(label) if len(remainingAttributes) == 0: return LeafNode(getMostCommonClass(examples, className)) # recursive step # find best attribute not_assigned = True best_info_gain = 0 best_attr = 0 for attr_name in remainingAttributes: local_info_gain = gainFunc(examples, attr_name, attributeValues[attr_name], className) if not_assigned: best_info_gain = local_info_gain best_attr = attr_name not_assigned = False else: if local_info_gain > best_info_gain: best_info_gain = local_info_gain best_attr = attr_name # chi-square dict_1 = getAttributeCounts(examples, best_attr, attributeValues[best_attr], className) dict_2 = {} for key in dict_1: count = 0 for i in dict_1[key]: count += dict_1[key][i] dict_2[key] = count class_count = getClassCounts(examples, className) dev = 0 for key in dict_1: chi = 0 for i in dict_1[key]: p_actual = dict_1[key][i] * 1.0 p_expect = class_count[i] / (len(examples) * 1.0) p_expect *= dict_2[key] p_diff = p_actual - p_expect chi += p_diff**2 / p_expect dev += chi if chisqprob(dev, len(attributeValues[best_attr]) - 1) > q: return LeafNode(getMostCommonClass(examples, className)) # add subtree root = Node(best_attr) remaining_attributes = list(remainingAttributes) remaining_attributes.remove(best_attr) for attr_value in attributeValues[best_attr]: subset_examples = getPertinentExamples(examples, best_attr, attr_value) child = makePrunedSubtrees(remaining_attributes, subset_examples, attributeValues, className, getMostCommonClass(examples, className), setScoreFunc, gainFunc, q) root.children[attr_value] = child return root
def makePrunedSubtrees(remainingAttributes, examples, attributeValues, className, defaultLabel, setScoreFunc, gainFunc, q): """ Creates a classification tree Node and all its children. This returns a Node, which is the root Node of the tree constructed from the passed in parameters. This should be implemented recursively, and handle base cases for zero examples or remainingAttributes as covered in the book. Args: remainingAttributes (list<string>): the names of attributes still not used examples (list<dictionary<str,str>>): list of examples attrValues (dictionary<string,list<string>>): list of possible values for attribute className (str): the name of the class defaultLabel (string): the default label setScoreFunc (func): the function to score classes (ie classEntropy or gini) gainFunc (func): the function to score gain of attributes (ie entropyGain or giniGain) q (float): the Chi-Squared pruning parameter Returns: Node or LeafNode The classification tree node optimal for the remaining set of attributes. """ #YOUR CODE HERE (Extra Credit) if len(examples) == 0: node = LeafNode(defaultLabel) return node same = True firstattrval = examples[0][className] for example in examples: if example[className] != firstattrval: same = False if same == True: node = LeafNode(firstattrval) return node if len(remainingAttributes) == 0: return LeafNode(getMostCommonClass(examples, className)) argmax = None gainmax = -maxint for attr in remainingAttributes: if gainFunc(examples, attr, attributeValues[attr], className) > gainmax: gainmax = gainFunc(examples, attr, attributeValues[attr], className) argmax = attr attrdict = getAttributeCounts(examples, argmax, attributeValues[argmax], className) dicta = {} for key in attrdict.keys(): subsum = 0 for item in attrdict[key].keys(): subsum += attrdict[key][item] dicta[key] = subsum classcnt = getClassCounts(examples, className) dev = 0 for key in attrdict.keys(): chii = 0 for item in attrdict[key].keys(): pi = float(attrdict[key][item]) pih = (classcnt[item] / float(len(examples))) * dicta[key] chii += (pi - pih) * (pi - pih) / pih dev += chii v = len(attributeValues[argmax]) - 1 if chisqprob(dev, v) > q: return LeafNode(getMostCommonClass(examples, className)) root = Node(argmax) newremainattr = list(remainingAttributes) newremainattr.remove(argmax) for value in attributeValues[argmax]: cur = getPertinentExamples(examples, argmax, value) root.children[value] = makePrunedSubtrees( newremainattr, cur, attributeValues, className, getMostCommonClass(examples, className), setScoreFunc, gainFunc, q) return root
def dofit(datfile='nebra_bestphot.dat', z=2.00, dz=0.02, t0=57575., dt0=20.0, x1=None, c=None, model='Ia', noUV=True, debug=False): # TODO : read in the redshift, etc from the header. from .colorcolorfig import SubClassDict_SNANA # read in the obs data sn = ascii.read(datfile, format='commented_header', header_start=-1, data_start=0) if model == 'Ia': # define SALT2 models and set initial guesses for z and t0 if noUV: salt2ex = sncosmo.Model(source='salt2') else: salt2ex = sncosmo.Model(source='salt2-extended') salt2ex.source.set_peakmag(0., 'bessellb', 'ab') x0_AB0 = salt2ex.get('x0') salt2ex.set(z=z, t0=t0, x1=0.1, c=-0.2) # salt2ex.set( z=1.33, t0=56814.6, hostebv=0.05, hostr_v=3.1 ) # Do a bounded fit : # salt2res, salt2fit = sncosmo.fit_lc( sn, salt2, ['z','t0','x0','x1','c'], bounds={'z':(1.28,1.37),'t0':(56804,56824)} ) varlist = varlist = ['z', 't0', 'x0'] bounds = {'z': (z - dz, z + dz), 't0': (t0 - dt0, t0 + dt0)} if x1 is not None: salt2ex.set(x1=x1) bounds['x1'] = (x1 - 1e-6, x1 + 1e-6) varlist.append('x1') else: bounds['x1'] = (-5, 5) varlist.append('x1') if c is not None: salt2ex.set(c=c) else: bounds['c'] = (-0.5, 3.0) varlist.append('c') res, fit = sncosmo.fit_lc(sn, salt2ex, varlist, bounds) x0 = fit.get('x0') z = fit.get('z') mB = -2.5 * np.log10(x0 / x0_AB0) distmod = mB - MBmodel deltamuLCDM = distmod - dm(z) print("mB = %.2f" % mB) print("dist.mod. = %.2f" % distmod) print("Delta.mu_LCDM = %.2f" % deltamuLCDM) chi2 = res.chisq ndof = res.ndof pval = chisqprob(chi2, ndof) if ndof > 0: print("chi2/dof= %.3f" % (chi2 / float(ndof))) print("p-value = %.3f" % pval) else: print("chi2/dof= %.3f/%i" % (chi2, ndof)) print("p-value = %.3f" % pval) print("z = %.3f" % fit.get('z')) print("t0 = %.3f" % fit.get('t0')) print("x0 = %.3e" % fit.get('x0')) print("x1 = %.3f" % fit.get('x1')) print("c = %.3f" % fit.get('c')) elif model.lower() in ['cc', 'ib', 'ic', 'ii', 'ibc', 'iip', 'iin']: # remove the blue filters from the sn data bandlist = sn['filter'].data igood = np.array([band.lower().startswith('f1') for band in bandlist]) sn = sn.copy()[igood] # define a host-galaxy dust model dust = sncosmo.CCM89Dust() version = '1.0' if model.lower() == 'cc': classlist = ['Ib', 'Ic', 'IIP', 'IIn'] elif model.lower() == 'ii': classlist = ['IIP', 'IIn'] elif model.lower() == 'ibc': classlist = ['Ibc'] else: classlist = [model] # find the best-fit from each CC sub-class chi2list, reslist, fitlist = [], [], [] for snclass in classlist: for modname in SubClassDict_SNANA[snclass.lower()]: Av = 0.2 modkey = (sncosmo.Source, modname, version) if modkey not in sncosmo.registry._loaders: continue ccmodel = sncosmo.Model(source=modname, effects=[dust], effect_names=['host'], effect_frames=['rest']) ccmodel.set(z=z, t0=t0, hostr_v=3.1, hostebv=Av / 3.1) # Do a bounded fit : res, fit = sncosmo.fit_lc(sn, ccmodel, ['z', 't0', 'amplitude', 'hostebv'], debug=debug, bounds={ 'z': (z - dz, z + dz), 't0': (t0 - dt0, t0 + dt0), 'hostebv': (0.0, 1.0) }) chi2 = res.chisq ndof = res.ndof pval = chisqprob(chi2, ndof) print("%s chi2/dof= %.3f p=%.3f" % (modname, chi2 / float(ndof), pval)) chi2list.append(chi2 / float(ndof)) reslist.append(res) fitlist.append(fit) ichi2min = np.argmin(chi2list) res, fit = reslist[ichi2min], fitlist[ichi2min] else: # 'nugent-sn91bg' # remove the blue filters from the sn data bandlist = sn['filter'].data igood = np.array([band.startswith('f1') for band in bandlist]) sn = sn.copy()[igood] # define a host-galaxy dust model dust = sncosmo.CCM89Dust() version = '1.0' Av = 0.2 altmodel = sncosmo.Model(source=model, effects=[dust], effect_names=['host'], effect_frames=['rest']) altmodel.set(z=z, t0=t0, hostr_v=3.1, hostebv=Av / 3.1) # Do a bounded fit : res, fit = sncosmo.fit_lc(sn, altmodel, ['z', 't0', 'amplitude', 'hostebv'], debug=debug, bounds={ 'z': (z - dz, z + dz), 't0': (t0 - dt0, t0 + dt0), 'hostebv': (0.0, 1.0) }) chi2 = res.chisq ndof = res.ndof pval = chisqprob(chi2, ndof) print("%s chi2/dof= %.3f p=%.3f" % (model, chi2 / float(ndof), pval)) return (sn, fit, res)
def dofit( datfile='HST_CANDELS4_bush.sncosmo.dat', z=1.76, dz=0.53, t0=55803.1, dt0=25.0, x1=None, c=None, model='Ia', noUV=True, debug=False) : # TODO : read in the redshift, etc from the header. # read in the obs data sn = ascii.read( datfile, format='commented_header', header_start=-1, data_start=0 ) if model == 'Ia' : # define SALT2 models and set initial guesses for z and t0 if noUV : salt2ex = sncosmo.Model( source='salt2') else : salt2ex = sncosmo.Model( source='salt2-extended') salt2ex.source.set_peakmag( 0., 'bessellb', 'ab' ) x0_AB0 = salt2ex.get('x0') salt2ex.set( z=z, t0=t0, x1=0.1, c=-0.2 ) # salt2ex.set( z=1.33, t0=56814.6, hostebv=0.05, hostr_v=3.1 ) # Do a bounded fit : # salt2res, salt2fit = sncosmo.fit_lc( sn, salt2, ['z','t0','x0','x1','c'], bounds={'z':(1.28,1.37),'t0':(56804,56824)} ) varlist = varlist = ['z','t0','x0'] bounds={ 'z':(z-dz,z+dz), 't0':(t0-dt0,t0+dt0) } if x1 is not None: salt2ex.set( x1=x1 ) bounds['x1'] = (x1-1e-6,x1+1e-6) varlist.append( 'x1' ) else : bounds['x1'] = (-5,5) varlist.append( 'x1' ) if c is not None: salt2ex.set( c=c ) else : bounds['c'] = (-0.5,3.0) varlist.append( 'c' ) res, fit = sncosmo.fit_lc( sn, salt2ex, varlist, bounds ) x0 = fit.get( 'x0' ) z = fit.get( 'z' ) mB = -2.5*np.log10( x0 / x0_AB0 ) distmod = mB - MBmodel deltamuLCDM = distmod - dm(z) print( "mB = %.2f"%mB ) print( "dist.mod. = %.2f"%distmod) print( "Delta.mu_LCDM = %.2f"%deltamuLCDM) chi2 = res.chisq ndof = res.ndof pval = chisqprob( chi2, ndof ) if ndof>0: print( "chi2/dof= %.3f"% (chi2/float(ndof) ) ) print( "p-value = %.3f"% pval ) else : print( "chi2/dof= %.3f/%i"%( chi2, ndof) ) print( "p-value = %.3f"% pval ) print( "z = %.3f"% fit.get('z') ) print( "t0 = %.3f"% fit.get('t0') ) print( "x0 = %.3e"% fit.get('x0') ) print( "x1 = %.3f"% fit.get('x1') ) print( "c = %.3f"% fit.get('c') ) elif model.lower() in ['cc','ib','ic','ii','ibc','iip','iin']: # remove the blue filters from the sn data bandlist = sn['filter'].data igood = np.array( [ band.startswith('f1') for band in bandlist ] ) sn = sn.copy()[igood] # define a host-galaxy dust model dust = sncosmo.CCM89Dust( ) version = '1.0' if model.lower()=='cc' : classlist = ['Ib','Ic','IIP','IIn'] elif model.lower()=='ii' : classlist = ['IIP','IIn'] elif model.lower()=='ibc' : classlist = ['Ib','Ic'] else : classlist = [model] # find the best-fit from each CC sub-class chi2list, reslist, fitlist = [],[],[] for snclass in classlist : for tempnum in range( 1, 10 ): Av = 0.2 modname = snclass.lower() + '.%02i'%tempnum modkey = ( sncosmo.Source, modname, version ) if modkey not in sncosmo.registry._loaders : continue ccmodel = sncosmo.Model( source=modname, effects=[dust], effect_names=['host'], effect_frames=['rest']) ccmodel.set( z=z, t0=t0, hostr_v=3.1, hostebv=Av/3.1 ) # Do a bounded fit : res, fit = sncosmo.fit_lc( sn, ccmodel, ['z','t0','amplitude','hostebv' ], debug=debug, bounds={'z':(z-dz,z+dz),'t0':(t0-dt0,t0+dt0), 'hostebv':(0.0,1.0) } ) chi2 = res.chisq ndof = res.ndof pval = chisqprob( chi2, ndof ) print( "%s chi2/dof= %.3f p=%.3f"%(modname, chi2/float(ndof), pval ) ) chi2list.append( chi2/float(ndof) ) reslist.append( res ) fitlist.append( fit ) ichi2min = np.argmin( chi2list ) res, fit = reslist[ichi2min], fitlist[ichi2min] else : # 'nugent-sn91bg' # remove the blue filters from the sn data bandlist = sn['filter'].data igood = np.array( [ band.startswith('f1') for band in bandlist ] ) sn = sn.copy()[igood] # define a host-galaxy dust model dust = sncosmo.CCM89Dust( ) version = '1.0' Av = 0.2 altmodel = sncosmo.Model( source=model, effects=[dust], effect_names=['host'], effect_frames=['rest']) altmodel.set( z=z, t0=t0, hostr_v=3.1, hostebv=Av/3.1 ) # Do a bounded fit : res, fit = sncosmo.fit_lc( sn, altmodel, ['z','t0','amplitude','hostebv' ], debug=debug, bounds={'z':(z-dz,z+dz),'t0':(t0-dt0,t0+dt0), 'hostebv':(0.0,1.0) } ) chi2 = res.chisq ndof = res.ndof pval = chisqprob( chi2, ndof ) print( "%s chi2/dof= %.3f p=%.3f"%(model, chi2/float(ndof), pval ) ) return( sn, fit, res )