def splinefit(x, y, degree): results = {} # print len(x), " ", len(y) s = UnivariateSpline(x, y, s=degree) # spline Coefficients results['spline'] = s.get_coeffs() # fit values, and mean yhat = s(x) # display2(x, y, yhat, 1) ybar = sum(y) / len(y) results['residual'] = rss = s.get_residual() # also can be calcualte below # for i in range(0, len(y)): # rss += (y[i] - yhat[i]) ** 2 sstot = sum([(yi - ybar)**2 for yi in y]) ssreg = sstot - rss results['determination'] = ssreg / sstot return results
def spline(self, k=3, s=0.5): """ Interpolates uneven observation data into daily data using scipy.interpolate.UnivariateSpline with parameters k and s given""" # requires filtered_data to exist, check by making sure it isn't empty if len(self.filtered_data) == 0: raise ValueError # first we need to redo the dates as days from start for patient in self.filtered_data: self.filtered_data[patient]['days'] = [] for date in self.filtered_data[patient]['dates']: since_beginning = (date - self.filtered_data[patient]['dates'][0]).days self.filtered_data[patient]['days'].append(since_beginning) # now we spline error = 0 splined_data = dict() for patient in self.filtered_data: day_indices = np.array(self.filtered_data[patient]['days']) splined_data[patient] = [] for question in range(self.num_items): item_data = np.array([i[question] for i in self.filtered_data[patient]['data']]) if len(item_data) == 0: print "Uh, no responses for question %d patient %s" % (question+1, patient) raise ValueError # Now we need to take out the data with None values... not certain how to handle it if the patient ends up not having enough data # because of this filtering, but for UPittSSRI data (primary focus as of 7/29/13) we don't need to worry about it good_indices = np.array([ind for ind, resp in enumerate(item_data) if resp != None]) filtered_days = day_indices[good_indices] filtered_data = item_data[good_indices] full_space = np.linspace(0,self.keep_days,num=self.keep_days) spl = UnivariateSpline(filtered_days, filtered_data, k=k, s=s) self.residual = spl.get_residual() self.knots = spl.get_knots() splined = spl(full_space) splined_data[patient].append(splined) # Measure error ms = math.sqrt(sum((splined[filtered_days] - filtered_data)**2)/len(filtered_data)) error += ms splined_data[patient] = np.array(splined_data[patient]).T error /= (self.num_items)*(len(splined_data)) self.spline_err = error self.splined_data = splined_data self.data_splined = True self.data = splined_data return splined_data
def calc_chi(ym, omega, c0): # Parse data, cycling over each point j in each data set i # First clear any previous data count = 0; xL[:] = 0.; yL[:] = 0.; wL[:] = 0. for i_str in all_L: i = all_L.index(i_str) L = float(i_str) for j in range(len(mf[i])): # print "L[%d]=%d, m[%d][%d]=%.2g" % (i, L, i, j, mf[i][j]) xL[count] = L * np.power(mf[i][j], 1 / ym) scale = 1. + c0 * np.power(mf[i][j], omega) yL[count] = L * MH[i][j] / scale wL[count] = scale / (L * err[i][j]) # Not squared, as for polyfit count += 1 # Compute cubic spline curve = UnivariateSpline(xL, yL, w=wL, k=3) chiSq = curve.get_residual() if not chiSq >= 0: print "ERROR: spline failed, chiSq =", chiSq sys.exit(1) return chiSq
class DualSplineSmoother(object): ''' claselfdocs ''' def __init__(self, yp, workdir, scale, sm=200): ''' Constructor ''' yp = np.array(yp) self.l = len(yp)/2 self.xPos = (self.l-2)/2 #fPos = (self.l-2)/2 + 2 tnsc = 2/scale print tnsc plt.rcParams['font.size'] = 24 plt.rcParams['lines.linewidth'] = 2.4 self.workdir = workdir avProfilePoints = yp[:self.l] self.avx = np.append(np.append([0], np.sort(np.tanh(tnsc*avProfilePoints[:self.xPos]))),[1]) self.av = avProfilePoints[self.xPos:] sigmaProfilePoints = yp[self.l:] self.sigmax = np.append(np.append([0], np.sort(np.tanh(tnsc*sigmaProfilePoints[:self.xPos]))),[1]) self.sigma = sigmaProfilePoints[self.xPos:] self.m = UnivariateSpline(self.avx, self.av) print "Created spline with " + str(len(self.m.get_knots())) + " knots" self.s = UnivariateSpline(self.sigmax, self.sigma) print "Created spline with " + str(len(self.s.get_knots())) + " knots" def saveSpline(self, filename): tp = np.linspace(0, 1, 1000) with open(filename ,"w+") as f: for i in range(0, 1000): f.write( str(tp[i]) + " , " + str(self.m(tp[i])) ) if i < 999: f.write("\n") f.close() def saveSigmaSpline(self, filename): tp = np.linspace(0, 1, 1000) with open(filename ,"w+") as f: for i in range(0, 1000): f.write( str(tp[i]) + " , " + str(self.s(tp[i])) ) if i < 999: f.write("\n") f.close() def showSpline(self, order=0): plt.clf() print "Spline full information:" print self.m.get_knots() print self.m.get_coeffs() print self.m.get_residual() tp = np.linspace(0, 1, 1000) plt.subplot(211) plt.scatter(self.avx,self.av) plt.plot(tp,self.m(tp)) plt.subplot(212) plt.scatter(self.sigmax,self.sigma) plt.plot(tp,self.s(tp)) plt.savefig(self.workdir+"/splineFit.pdf") if order > 0: plt.clf() plt.subplot(211) plt.plot(tp,self.m(tp,1)) plt.subplot(212) plt.plot(tp,self.s(tp,1)) plt.savefig(self.workdir+"/splineDerivative.pdf") def plotSplineData(self, dataContainer, yscale): plt.clf() plt.xlim(0,1) plt.ylim(0,yscale) tp = np.linspace(0, 1, 100) plt.scatter(dataContainer.points[0],dataContainer.points[1]+dataContainer.background, c='b', marker='o', s=5) plt.plot(tp, self.m(tp)+dataContainer.background,'r', linewidth=2) plt.plot(tp, self.m(tp)+np.sqrt(self.s(tp))+dataContainer.background,'r--', linewidth=2) plt.plot(tp, self.m(tp)-np.sqrt(self.s(tp))+dataContainer.background,'r--', linewidth=2) plt.plot(tp, np.zeros(100) + dataContainer.background, '--', c='#BBBBBB', alpha=0.8) plt.savefig(self.workdir+"/splineVsData.pdf") def plotBinnedData(self, dataContainer): plt.clf() tp = np.linspace(0, 1, dataContainer.numBins) tpHD = np.linspace(0, 1, 500) plt.plot(self.m(tpHD), self.s(tpHD)) plt.plot(dataContainer.avs, np.power(dataContainer.stds,2), 'o') plt.savefig(self.workdir+"/noiseVsBins.pdf") plt.clf() plt.plot(tpHD, self.m(tpHD),'r', linewidth=2) plt.plot(tp, dataContainer.avs, 'o') plt.savefig(self.workdir+"/splineVsBins.pdf") plt.clf() plt.plot(tpHD, self.s(tpHD),'r', linewidth=2) plt.plot(tp, np.power(dataContainer.stds,2), 'o') plt.savefig(self.workdir+"/spatialNoiseVsBins.pdf") def plotFisherInfo(self, dataContainer, ymax, ymaxsq): plt.clf() t = np.linspace(0, 1, 500) minf = lambda x: -1 * self.m(x) minx = fminbound(minf, 0, 1) fval = self.m(minx) noisemean = UnivariateSpline(self.m(t)/fval, self.s(t)/fval/fval) self.se = noisemean(0) fi = lambda a, sa, sp: 2*np.power(sa, 2)/ (np.power(a,2)*2*sa+np.power(sp,2)) fiapp = lambda a, sa, sp: sa / (np.power(a,2)) plt.xlim(0, 1) plt.ylim(0, ymaxsq) print 'whop whop' plt.plot(t, fi(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval)) plt.plot(t, fiapp(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval), 'r') plt.savefig(self.workdir+"/variance.pdf") plt.clf() plt.ylim(0, ymax) plt.plot(t, np.sqrt(fi(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval))) plt.plot(t, np.sqrt(fiapp(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval)), 'r') plt.savefig(self.workdir+"/stddev.pdf") plt.clf() plt.plot(t, 1/fi(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval)) plt.plot(t, 1/fiapp(self.m(t,1)/fval, self.s(t)/fval/fval-self.se, self.s(t, 1)/fval/fval), 'r') plt.savefig(self.workdir+"/fisherInfo.pdf")
class SplineSmoother(object): ''' claselfdocs ''' def __init__(self, yp, workdir, scale, sm=200): ''' Constructor ''' self.workdir = workdir yp = np.array(yp) self.l = len(yp) self.xPos = (self.l - 2) / 2 #fPos = (self.l-2)/2 + 2 tnsc = 2 / scale print tnsc plt.rcParams['font.size'] = 24 plt.rcParams['lines.linewidth'] = 2.4 self.workdir = workdir self.avx = np.append( np.append([0], np.sort(np.tanh(tnsc * yp[:self.xPos]))), [1]) self.av = yp[self.xPos:] self.m = UnivariateSpline(self.avx, self.av) plt.rcParams['font.size'] = 24 plt.rcParams['lines.linewidth'] = 2.4 print "Created spline with " + str(len(self.m.get_knots())) + " knots" def saveSpline(self, filename): tp = np.linspace(0, 1, 1000) with open(filename, "w+") as f: for i in range(0, 1000): f.write(str(tp[i]) + " , " + str(self.m(tp[i]))) if i < 999: f.write("\n") f.close() def showSpline(self, order=0): plt.clf() print "Spline full information:" print self.m.get_knots() print self.m.get_coeffs() print self.m.get_residual() tp = np.linspace(0, 1, 1000) plt.scatter(self.avx, self.av) plt.plot(tp, self.m(tp)) plt.savefig(self.workdir + "/splineFit.pdf") if order > 0: plt.plot(tp, self.m(tp, 1)) if order > 1: plt.plot(tp, self.m(tp, 2)) plt.savefig(self.workdir + "/splineDerivative.pdf") def plotSplineData(self, dataContainer, s, p, se, yscale): plt.clf() plt.xlim(0, 1) plt.ylim(0, yscale) tp = np.linspace(0, 1, 500) plt.scatter(dataContainer.points[0], dataContainer.points[1] + dataContainer.background, c='b', marker='o', s=5) plt.plot(tp, self.m(tp) + dataContainer.background, 'r', linewidth=2) plt.plot(tp, self.m(tp) + np.sqrt(s * (p * self.m(tp) * self.m(tp) + self.m(tp) + se)) + dataContainer.background, 'r--', linewidth=2) plt.plot(tp, self.m(tp) - np.sqrt(s * (p * self.m(tp) * self.m(tp) + self.m(tp) + se)) + dataContainer.background, 'r--', linewidth=2) plt.plot(tp, np.zeros(500) + dataContainer.background, '--', c='#BBBBBB', alpha=0.8) plt.savefig(self.workdir + "/splineVsData.pdf") def plotBinnedData(self, dataContainer, s, p, se, xmin, xmax): plt.clf() sigma = lambda x: s * (p * x * x + x + se) t = np.linspace(xmin, xmax, 500) plt.xlim(xmin, xmax) plt.plot(t, sigma(t)) plt.plot(dataContainer.avs, np.power(dataContainer.stds, 2), 'o') plt.savefig(self.workdir + "/noiseVsBins.pdf") plt.clf() tp = np.linspace(0, 1, dataContainer.numBins) plt.plot(tp, self.m(tp), 'r', linewidth=2) plt.plot(tp, dataContainer.avs, 'o') plt.savefig(self.workdir + "/splineVsBins.pdf") def plotFisherInfo(self, dataContainer, s, p, se, ymax, ymaxsq): plt.clf() t = np.linspace(0, 1, 1000) minf = lambda x: -1 * self.m(x) minx = fminbound(minf, 0, 1) fval = self.m(minx) s = s / fval se = se / fval p = p * fval print fval, s, p, se fi = lambda m, mp: 2 * np.power(s * (p * np.power(m, 2) + m), 2) / ( np.power(mp, 2) * (2 * s * (p * np.power(m, 2) + m) + np.power(s * (2 * p * m + 1), 2))) fiapp = lambda m, mp: s * (p * np.power(m, 2) + m) / np.power(mp, 2) plt.xlim(0, 1) plt.ylim(0, ymaxsq) plt.plot(t, fi(self.m(t) / fval, self.m(t, 1) / fval)) plt.plot(t, fiapp(self.m(t) / fval, self.m(t, 1) / fval), 'r') plt.savefig(self.workdir + "/variance.pdf") plt.clf() plt.ylim(0, ymax) plt.plot(t, np.sqrt(fi(self.m(t) / fval, self.m(t, 1) / fval))) plt.plot(t, np.sqrt(fiapp(self.m(t) / fval, self.m(t, 1) / fval)), 'r') plt.savefig(self.workdir + "/stddev.pdf") plt.clf() plt.plot(t, 1 / fi(self.m(t) / fval, self.m(t, 1) / fval)) plt.plot(t, 1 / fiapp(self.m(t) / fval, self.m(t, 1) / fval), 'r') plt.savefig(self.workdir + "/fisherInfo.pdf") with open(self.workdir + "/variance.csv", "w+") as f: fisher = np.sqrt(fi(self.m(t) / fval, self.m(t, 1) / fval)) for i in range(0, 1000): f.write(str(t[i]) + " , " + str(fisher[i])) if i < 999: f.write("\n") f.close()
class SplineSmoother(object): ''' claselfdocs ''' def __init__(self, yp, workdir, scale, sm=200): ''' Constructor ''' self.workdir = workdir yp = np.array(yp) self.l = len(yp) self.xPos = (self.l-2)/2 #fPos = (self.l-2)/2 + 2 tnsc = 2/scale print tnsc plt.rcParams['font.size'] = 24 plt.rcParams['lines.linewidth'] = 2.4 self.workdir = workdir self.avx = np.append(np.append([0], np.sort(np.tanh(tnsc*yp[:self.xPos]))),[1]) self.av = yp[self.xPos:] self.m = UnivariateSpline(self.avx, self.av) plt.rcParams['font.size'] = 24 plt.rcParams['lines.linewidth'] = 2.4 print "Created spline with " + str(len(self.m.get_knots())) + " knots" def saveSpline(self, filename): tp = np.linspace(0, 1, 1000) with open(filename ,"w+") as f: for i in range(0, 1000): f.write( str(tp[i]) + " , " + str(self.m(tp[i])) ) if i < 999: f.write("\n") f.close() def showSpline(self, order=0): plt.clf() print "Spline full information:" print self.m.get_knots() print self.m.get_coeffs() print self.m.get_residual() tp = np.linspace(0, 1, 1000) plt.scatter(self.avx, self.av) plt.plot(tp,self.m(tp)) plt.savefig(self.workdir+"/splineFit.pdf") if order > 0: plt.plot(tp,self.m(tp,1)) if order > 1: plt.plot(tp,self.m(tp,2)) plt.savefig(self.workdir+"/splineDerivative.pdf") def plotSplineData(self, dataContainer, s, p, se, yscale): plt.clf() plt.xlim(0,1) plt.ylim(0,yscale) tp = np.linspace(0, 1, 500) plt.scatter(dataContainer.points[0],dataContainer.points[1]+dataContainer.background, c='b', marker='o', s=5) plt.plot(tp, self.m(tp)+dataContainer.background,'r', linewidth=2) plt.plot(tp, self.m(tp)+np.sqrt(s*(p*self.m(tp)*self.m(tp)+self.m(tp)+se))+dataContainer.background,'r--', linewidth=2) plt.plot(tp, self.m(tp)-np.sqrt(s*(p*self.m(tp)*self.m(tp)+self.m(tp)+se))+dataContainer.background,'r--', linewidth=2) plt.plot(tp, np.zeros(500) + dataContainer.background, '--', c='#BBBBBB', alpha=0.8) plt.savefig(self.workdir+"/splineVsData.pdf") def plotBinnedData(self, dataContainer, s, p, se, xmin, xmax): plt.clf() sigma = lambda x: s * (p*x*x + x + se) t = np.linspace(xmin, xmax, 500) plt.xlim(xmin, xmax) plt.plot(t, sigma(t)) plt.plot(dataContainer.avs, np.power(dataContainer.stds,2), 'o') plt.savefig(self.workdir+"/noiseVsBins.pdf") plt.clf() tp = np.linspace(0, 1, dataContainer.numBins) plt.plot(tp, self.m(tp),'r', linewidth=2) plt.plot(tp, dataContainer.avs, 'o') plt.savefig(self.workdir+"/splineVsBins.pdf") def plotFisherInfo(self, dataContainer, s, p, se, ymax, ymaxsq): plt.clf() t = np.linspace(0, 1, 1000) minf = lambda x: -1 * self.m(x) minx = fminbound(minf, 0, 1) fval = self.m(minx) s = s/fval se = se/fval p = p*fval print fval, s, p, se fi = lambda m, mp: 2*np.power(s * (p * np.power(m,2) + m),2)/(np.power(mp,2) * (2 * s * (p * np.power(m,2) + m) + np.power(s * (2 * p * m + 1), 2))) fiapp = lambda m, mp: s * (p * np.power(m,2) + m) / np.power(mp,2) plt.xlim(0, 1) plt.ylim(0, ymaxsq) plt.plot(t, fi(self.m(t)/fval, self.m(t, 1)/fval)) plt.plot(t, fiapp(self.m(t)/fval, self.m(t, 1)/fval), 'r') plt.savefig(self.workdir+"/variance.pdf") plt.clf() plt.ylim(0, ymax) plt.plot(t, np.sqrt(fi(self.m(t)/fval, self.m(t, 1)/fval))) plt.plot(t, np.sqrt(fiapp(self.m(t)/fval, self.m(t, 1)/fval)), 'r') plt.savefig(self.workdir+"/stddev.pdf") plt.clf() plt.plot(t, 1/fi(self.m(t)/fval, self.m(t, 1)/fval)) plt.plot(t, 1/fiapp(self.m(t)/fval, self.m(t, 1)/fval), 'r') plt.savefig(self.workdir+"/fisherInfo.pdf") with open(self.workdir+"/variance.csv" ,"w+") as f: fisher=np.sqrt(fi(self.m(t)/fval, self.m(t, 1)/fval)) for i in range(0, 1000): f.write( str(t[i]) + " , " + str(fisher[i]) ) if i < 999: f.write("\n") f.close()
def Functional_pval(a_vec, b_vec, a_xvals, b_xvals, d1=None, d2=None): #Use spline fitting & and functional analysis to calculate a # similarity p-value. #First, we calculate a cubic spline for each trend, # which stores the residual in the returned object f_a = UnivariateSpline(x=a_xvals, y=a_vec) f_b = UnivariateSpline(x=b_xvals, y=b_vec) #Next, we calculate the cubic spline of the null hypothesis, # assuming both measured trends are drawn from the same # underlying smooth trend. This requires some book-keeping. #x values need to be strictly increasing, but we have multiple # values for some x's in a and b. So let's offset by an infintesimal # amount whenever there's a repeat. b_xvals = [x + 0.00001 if x in a_xvals else x for x in b_xvals] #So when we join our two trends together, the xvalues sort nicely full_range = sorted(list(a_xvals) + list(b_xvals)) #BUT we have to keep track of the indexing too, # for when we join the a and b y-value lists together. # This ID's the index order we want to draw from the joined list new_indexes = [ i[0] for i in sorted(enumerate(list(a_xvals) + list(b_xvals)), key=lambda x: x[1]) ] #Combine the y-value lists and order them to match the x-values full_vec = list(a_vec) + list(b_vec) result = [full_vec[x] for x in new_indexes] #Fit the null hypothesis spline f_ab = UnivariateSpline(x=full_range, y=result) #these are already the sum of squared resisuals res_a = f_a.get_residual() res_b = f_b.get_residual() res_ab = f_ab.get_residual() #On the first pass, we're just using this function to # derive the distributions of residuals. Don't calculate # the F-score, just return the residuals if d1 == None and d2 == None: return res_ab, res_a, res_b else: #F-test is F = (d2/d1) * (RSS0-RSS1)/(RSS1) # where RSS is residual sum of squares, # RSS0 is the null hypothesis residual, # RSS1 is the sum of residuals from separate models (res_a+res_b), # and d1 and d2 are the degrees of freedom. # Comparing 2 groups, df_Numerator = k - 1 = 2 - 1 #d2 = 1.0 #df_Denominator = N - k = N - 1 = #d1 = len(result) - 1.0 try: F = (d2 / d1) * (res_ab - (res_a + res_b)) / (res_a + res_b) except: #Add a little residual if the cubic spline fit is _too_ good # (a divide-by-zero error) F = (d2 / d1) * (res_ab - (res_a + res_b)) / (res_a + res_b + 0.001) #The p-value here is the probability that two trends, drawn # from the same underlying smooth trend, would give a better # RSS from separate splines rather than the same spline p_val = stats.f.cdf(F, d2, d1) return p_val
def build_scissors(self, domains, bounds=None, k=3, **kwargs): """ Construct a scissors operator by interpolating the QPState corrections as function of the initial energies E0. Args: domains: list in the form [ [start1, stop1], [start2, stop2] Domains should not overlap, cover e0mesh, and given in increasing order. Holes are permitted but the interpolation will raise an exception if the point is not in domains. bounds: Specify how to handle out-of-boundary conditions, i.e. how to treat energies that do not fall inside one of the domains (not used at present) ============== ============================================================== kwargs Meaning ============== ============================================================== plot If true, use `matplolib` to compare input data and fit. ============== ============================================================== Return: instance of :class:`Scissors`operator Usage example: .. code-block:: python # Build the scissors operator. scissors = qplist_spin[0].build_scissors(domains) # Compute list of interpolated QP energies. qp_enes = [scissors.apply(e0) for e0 in ks_energies] """ # Sort QP corrections according to the initial KS energy. qps = self.sort_by_e0() e0mesh, qpcorrs = qps.get_e0mesh(), qps.get_qpeme0() # Check domains. domains = np.atleast_2d(domains) dsize, dflat = domains.size, domains.ravel() for idx, v in enumerate(dflat): if idx == 0 and v > e0mesh[0]: raise ValueError("min(e0mesh) %s is not included in domains" % e0mesh[0]) if idx == dsize-1 and v < e0mesh[-1]: raise ValueError("max(e0mesh) %s is not included in domains" % e0mesh[-1]) if idx != dsize-1 and dflat[idx] > dflat[idx+1]: raise ValueError("domain boundaries should be given in increasing order.") if idx == dsize-1 and dflat[idx] < dflat[idx-1]: raise ValueError("domain boundaries should be given in increasing order.") # Create the sub_domains and the spline functions in each subdomain. func_list = [] residues = [] if len(domains) == 2: #print('forcing extrmal point on the scissor') ndom = 0 else: ndom = 99 for dom in domains[:]: ndom += 1 low, high = dom[0], dom[1] start, stop = find_ge(e0mesh, low), find_le(e0mesh, high) dom_e0 = e0mesh[start:stop+1] dom_corr = qpcorrs[start:stop+1] # todo check if the number of non degenerate data points > k from scipy.interpolate import UnivariateSpline w = len(dom_e0)*[1] if ndom == 1: w[-1] = 1000 elif ndom == 2: w[0] = 1000 else: w = None f = UnivariateSpline(dom_e0, dom_corr, w=w, bbox=[None, None], k=k, s=None) func_list.append(f) residues.append(f.get_residual()) # Build the scissors operator. sciss = Scissors(func_list, domains, residues, bounds) # Compare fit with input data. if kwargs.pop("plot", False): title = kwargs.pop("title", None) import matplotlib.pyplot as plt plt.plot(e0mesh, qpcorrs, 'o', label="input data") if title: plt.suptitle(title) for dom in domains[:]: plt.plot(2*[dom[0]], [min(qpcorrs), max(qpcorrs)]) plt.plot(2*[dom[1]], [min(qpcorrs), max(qpcorrs)]) intp_qpc = [sciss.apply(e0) for e0 in e0mesh] plt.plot(e0mesh, intp_qpc, label="scissor") plt.legend(bbox_to_anchor=(0.9, 0.2)) plt.show() # Return the object. return sciss
MSEs_CV.append(MSE_CV) poly_params = np.polyfit(X, Y, 5) plt.plot(X, Y, 'black') plt.plot(X, np.polyval(poly_params, X), 'r-') plt.show() """ plt.plot(degrees,MSEs_train,'black')#,label="train MSE") plt.plot(degrees,MSEs_CV,'red')#,label="CV MSE") plt.show() """ import numpy as np from scipy.interpolate import UnivariateSpline degree = 3 # In range 0 .. 5 smoothing = 1 # Lower bound for SSE Xinv = X[::-1] Yinv = Y[::-1] s = UnivariateSpline(Xinv, Yinv, k=degree, s=4e9) #get_coeffs(), get_knots(), get_residual() plt.plot(Xinv, Yinv, "black") plt.plot(Xinv, s(Xinv), "blue") plt.show() print len(s.get_knots()) print s.get_residual()
def build_scissors(self, domains, bounds=None, k=3, **kwargs): """ Construct a scissors operator by interpolating the QPState corrections as function of the initial energies E0. Args: domains: list in the form [ [start1, stop1], [start2, stop2] Domains should not overlap, cover e0mesh, and given in increasing order. Holes are permitted but the interpolation will raise an exception if the point is not in domains. bounds: Specify how to handle out-of-boundary conditions, i.e. how to treat energies that do not fall inside one of the domains (not used at present) ============== ============================================================== kwargs Meaning ============== ============================================================== plot If true, use `matplolib` to compare input data and fit. ============== ============================================================== Return: instance of :class:`Scissors`operator Usage example: .. code-block:: python # Build the scissors operator. scissors = qplist_spin[0].build_scissors(domains) # Compute list of interpolated QP energies. qp_enes = [scissors.apply(e0) for e0 in ks_energies] """ # Sort QP corrections according to the initial KS energy. qps = self.sort_by_e0() e0mesh, qpcorrs = qps.get_e0mesh(), qps.get_qpeme0() # Check domains. domains = np.atleast_2d(domains) dsize, dflat = domains.size, domains.ravel() for idx, v in enumerate(dflat): if idx == 0 and v > e0mesh[0]: raise ValueError("min(e0mesh) %s is not included in domains" % e0mesh[0]) if idx == dsize-1 and v < e0mesh[-1]: raise ValueError("max(e0mesh) %s is not included in domains" % e0mesh[-1]) if idx != dsize-1 and dflat[idx] > dflat[idx+1]: raise ValueError("domain boundaries should be given in increasing order.") if idx == dsize-1 and dflat[idx] < dflat[idx-1]: raise ValueError("domain boundaries should be given in increasing order.") # Create the sub_domains and the spline functions in each subdomain. func_list = [] residues = [] for dom in domains[:]: low, high = dom[0], dom[1] start, stop = find_ge(e0mesh, low), find_le(e0mesh, high) dom_e0 = e0mesh[start:stop+1] dom_corr = qpcorrs[start:stop+1].real # todo check if the number of non degenerate data points > k from scipy.interpolate import UnivariateSpline f = UnivariateSpline(dom_e0, dom_corr, w=None, bbox=[None, None], k=k, s=None) func_list.append(f) residues.append(f.get_residual()) # Build the scissors operator. sciss = Scissors(func_list, domains, residues, bounds) # Compare fit with input data. if kwargs.pop("plot", False): title = kwargs.pop("title", None) import matplotlib.pyplot as plt plt.plot(e0mesh, qpcorrs, 'o', label="input data") if title: plt.suptitle(title) for dom in domains[:]: plt.plot(2*[dom[0]], [min(qpcorrs), max(qpcorrs)]) plt.plot(2*[dom[1]], [min(qpcorrs), max(qpcorrs)]) intp_qpc = [sciss.apply(e0) for e0 in e0mesh] plt.plot(e0mesh, intp_qpc, label="scissor") plt.legend(bbox_to_anchor=(0.9, 0.2)) plt.show() # Return the object. return sciss