def phase(filtered, carf, sampf,tarr,bitclock,baudrate): uphasearr = [] # Establishing arrays to hold the entire unfiltered phase lphasearr = [] # in both the upper and lower sideband frequencies deltaf = 50 # This is determined from baudrate and modulation scheme (MSK) window = 125 # This is the window the phase is calculated and averaged over for a single bit (1/6 of a full bit). This bit phase is in turn averaged over the whole second later on phasebitsize = len(filtered[0])/window/baudrate # data points in a bit in phase time series (6) rawbitsize = len(filtered[0])/baudrate # data points in a bit in raw signal time series (750) bins = len(filtered[0])/window - phasebitsize # Lose a full bits worth of data points(6) to start in sync with bitclock time = np.array(tarr) # Just to not f up the 'tarr' array created a 'time' array for k in range(0,len(filtered)): modu = carf[k] + deltaf # The sideband frequencies used in the modl = carf[k] - deltaf # MSK modulation scheme startbin = (np.abs(time - bitclock[k])).argmin() # Start measuring the phase at start of measured bitclock endbin = startbin - rawbitsize # Endbin will be negative to make sure it is even splitting the time series into chunks 1/6 of a bit in length uy = filtered[k]*sin((2.0)*(pi)*modu*time) # Crunching the phase in segments ux = filtered[k]*cos((2.0)*(pi)*modu*time) # 1/6 of a bit in length uysum = np.split(uy[startbin:endbin],bins) # Summed over this whole segment for uxsum = np.split(ux[startbin:endbin],bins) # phase measurement uphase = -arctan((sum(uysum, axis = 1))/(sum(uxsum, axis = 1))) # a phase for upper and lower sidebands in MSK modulation ly = filtered[k]*sin((2.0)*(pi)*modl*time) # Crunching the phase in segments lx = filtered[k]*cos((2.0)*(pi)*modl*time) # 1/6 of a bit in length lysum = np.split(ly[startbin:endbin],bins) # Summed over this whole segment for lxsum = np.split(lx[startbin:endbin],bins) # phase measurement lphase = -arctan((sum(lysum, axis = 1))/(sum(lxsum, axis = 1))) # this is the lower sidebands phase lphasearr.extend([lphase]) # Adding the arrays of uppper phase uphasearr.extend([uphase]) # and lower phase for each frequency return uphasearr, lphasearr # Each element in array has 1194 datapoints
def calculateFDunc(self): #Calculates the uncertainty of the FFT according to: # - J. M. Fornies-Marquina, J. Letosa, M. Garcia-Garcia, J. M. Artacho, "Error Propagation for the transformation of time domain into frequency domain", IEEE Trans. Magn, Vol. 33, No. 2, March 1997, pp. 1456-1459 #return asarray _tdData #Assumes tha the amplitude of each time sample is statistically independent from the amplitude of the other time #samples # Calculates uncertainty of the real and imaginary part of the FFT and ther covariance unc_E_real = [] unc_E_imag = [] cov = [] for f in self.getfreqs(): unc_E_real.append(py.sum((py.cos(2*py.pi*f*self._tdData.getTimes())*self._tdData.getUncEX())**2)) unc_E_imag.append(py.sum((py.sin(2*py.pi*f*self._tdData.getTimes())*self._tdData.getUncEX())**2)) cov.append(-0.5*sum(py.sin(4*py.pi*f*self._tdData.getTimes())*self._tdData.getUncEX()**2)) unc_E_real = py.sqrt(py.asarray(unc_E_real)) unc_E_imag = py.sqrt(py.asarray(unc_E_imag)) cov = py.asarray(cov) # Calculates the uncertainty of the modulus and phase of the FFT unc_E_abs = py.sqrt((self.getFReal()**2*unc_E_real**2+self.getFImag()**2*unc_E_imag**2+2*self.getFReal()*self.getFImag()*cov)/self.getFAbs()**2) unc_E_ph = py.sqrt((self.getFImag()**2*unc_E_real**2+self.getFReal()**2*unc_E_imag**2-2*self.getFReal()*self.getFImag()*cov)/self.getFAbs()**4) t=py.column_stack((self.getfreqs(),unc_E_real,unc_E_imag,unc_E_abs,unc_E_ph)) return self.getcroppedData(t)
def calc_a(x_point, y_point): A = pylab.sum(pylab.square(x_point)) * 30 B = pylab.square(pylab.sum(x_point)) C = pylab.sum(x_point * y_point) * 30 D = pylab.sum(x_point) * pylab.sum(y_point) a = (C - D) / (A - B) print(a)
def likelihood(self, x0, X, Y, U): """returns the log likelihood of the states `X` and observations `Y` under the current model p(X,Y|M) Parameters ---------- x0 : matrix initial state X : list of matrix state sequence Y : list of matrix observation sequence U : list of matrix input sequence Notes ---------- This calculates p(X,Y|M) = p(x0)\prod_{t=1}^Tp(y_t|x_t)\prod_{t=1}^Tp(x_t|x_{t-1}) using the model currently defined in self. """ l1 = pb.sum([pb.log(self.observation_dist(x,y)) for (x,y) in zip(X,Y)]) l2 = pb.sum([ pb.log(self.transition_dist(x,u,xdash)) for (x,u,xdash) in zip(X[:-1],U[:-1],X[1:])]) l3 = self.init_dist(x0) l = l1 + l2 + l3 assert not pb.isinf(l).any(), (l1,l2,l3) return l
def FG(self,kList): """ recursive function for the F and G kernels kList is a List of numarray vectors """ if len(kList) == 1: return (1.0,1.0) sgg = 0.0 sgf = 0.0 n = len(kList) for m in range(1,n): k1 = M.sum(kList[:m]) k2 = M.sum(kList[m:]) k1Sqr = M.sum(k1*k1) k2Sqr = M.sum(k2*k2) print k1,k2,k1Sqr,k2Sqr if k1Sqr > self.eps and k2Sqr > self.eps: locF1,locG1 = self.FG(kList[:m]) locF2,locG2 = self.FG(kList[m:]) sgg += locG1*locG2*self.beta(k1,k2) sgf += locG1*locF2*self.alpha(k1,k2) return (((2*n+1.)*sgf+2*sgg)/(2*n+3.)/(n-1.), (3*sgf+2*n*sgg)/(2*n+3.)/(n-1.))
def CleanImage(im, bkgr_rad=100): ''' Removes DC background level from image. Averages value in lower-right corner of image within user-specified radius and subtracts from total image matrix. ''' center = (200, 200) im = im.astype(float) imrows, imcolumns = pl.shape(im) a = pl.arange(imrows * imcolumns) imflat = im.flatten() bkgrmask = (((divmod(a, imcolumns)[1] - center[1])**2 + (divmod(a, imcolumns)[0] - center[0])**2) < bkgr_rad**2) immasked = imflat * bkgrmask noise = pl.sum(immasked) / pl.sum(bkgrmask) print 'sum of background counts:', pl.sum(immasked) print 'area of quarter circle:', pl.pi * bkgr_rad**2 / 4. print 'average noise (in counts):', noise im -= noise #Bulldozer noise flattener: threshold = 200 im = im * (im > threshold) return im
def r_squared(y, estimated): """ Calculate the R-squared error term. Args: y: 1-d pylab array with length N, representing the y-coordinates of the N sample points estimated: an 1-d pylab array of values estimated by the regression model Returns: a float for the R-squared error term """ # TODO y_mean = pylab.mean(y) #R^2 = 1 - sum((y_i-e_i)^2)/sum((y_i-y_mean)^2) #first calculate sum of (y_i - e_i)^2 y_minus_e = y - estimated #list of y_i - e_i y_minus_e_sq = y_minus_e**2 #list of (y_i - e_i)^2 numerator = pylab.sum(y_minus_e_sq) #the numerator term sum((y_i-e_i)^2) #next calculate sum of (y_i - y_mean)^2 y_minus_mean = y - y_mean #list of y_i - y_mean y_minus_mean_sq = y_minus_mean**2 #list of (y_i - y_mean)^2 denom = pylab.sum( y_minus_mean_sq) #the denominator term sum((y_i-y_mean)^2) R_sq = 1 - numerator / denom return R_sq
def computeMomentumSpreading(self): spreading = [] meanPosMomentum = [] for i in range(len(self.variableArray)): profile = self.profileArray[i] / max(self.profileArray[i]) maxLocation = [] maxVal = [] for x0x1 in self.coords: x0 = x0x1[0] x1 = x0x1[1] maxLocation.append(x0 + py.argmax(profile[x0:x1])) maxVal.append(max(profile[x0:x1])) maxLocation = py.array(maxLocation) maxVal = py.array(maxVal) zeroIndex = int(len(maxLocation) / 2) momemtum = (py.array(range(len(profile))) - maxLocation[zeroIndex]) / self.h_d_ratio maxLocationMomentum = (py.array(maxLocation) - maxLocation[zeroIndex]) / self.h_d_ratio meanPosMomentum.append( py.sum(maxLocationMomentum * maxVal) / py.sum(maxVal)) spreading.append( py.sum(maxVal * (maxLocationMomentum - meanPosMomentum[-1])**2)) spreading = py.array(spreading) spreading = spreading / max(spreading) self.spreading = spreading self.meanPosMomentum = meanPosMomentum
def test_good_model(self): vars = models.latent_simplex(self.X) assert pl.all( pl.sum(vars['pi'].value, 1) <= 1.0 ), 'pi values should sum to at most 1, (%s found)' % pl.sum( vars['pi'].value, 1) m = mc.MCMC(vars) m.sample(10)
def lin_reg_UU(x, y): import numpy as np from numpy import arange from pylab import plot, legend, xlabel, ylabel from pylab import figure, mean, exp, sqrt, sum from scipy import stats # Based on "Least Squares fitting" equations from Math World website. # This version checked against data on the Wikipedia "Simple Linear Regression" pages. # It also calculates the +/- 1 sigma confidence limits in the regression [xfit,yband] # IN THIS VERSION THE YBAND PREDICTION ERRORS ARE CALCULATED # ACCORDING TO DAVID PEARSON (AS USED IN COX ET AL., 2013) nx = len(x) ny = len(y) xm = mean(x) ym = mean(y) x2 = x * x y2 = y * y xy = x * y ssxx = sum(x2) - nx * xm * xm ssyy = sum(y2) - ny * ym * ym ssxy = sum(xy) - ny * xm * ym b = ssxy / ssxx a = ym - b * xm yf = a + b * x r2 = ssxy**2 / (ssxx * ssyy) e2 = (y - yf)**2 s2 = sum(e2) / (nx - 2) s = sqrt(s2) da = s * sqrt(1.0 / nx + xm * xm / ssxx) db = s / sqrt(ssxx) # Calculate confidence limits on fit (see Wikipedia page on "Simple Linear Regression") minx = min(x) - 0.1 * (max(x) - min(x)) maxx = max(x) + 0.1 * (max(x) - min(x)) nfit = 200 dx = (maxx - minx) / nfit xfit = minx + dx * arange(0, nfit) yfit = a + b * xfit yband = np.zeros(nfit) # David Pearson's formula for "Prediction Error" for n in range(0, nfit): yband[n] = sqrt(s2 * (1.0 + 1.0 / nx + (xfit[n] - xm)**2 / ssxx)) pass return yf, a, b, da, db, xfit, yfit, yband
def alpha(self,k1,k2): """ alpha coupling function k1,k2 are numarray vectors """ if M.sum(k1*k1) < self.eps or M.sum(k2*k2) < self.eps: raise DivergentPTKernelError return M.sum((k1+k2)*k1)/M.sum(k1*k1)
def rsquared(fitfunc, param, x, y): ''' The R^2 value ''' yhat = fitfunc(param, x) ymean = pylab.mean(y) ssreg = pylab.sum((yhat - ymean)**2) sstot = pylab.sum((y - ymean)**2) return (ssreg / sstot)**2
def mean_score_per_attempt(num_attempts, num_attempt_limit, maxscores, first_20_scores): #create mask, so we only include people who play more than some number of games played_more_than = pb.array(num_attempts) > num_attempt_limit # this mask means we only look at people who got roughly equivalent scores on plays 1 and 2 equate_mask1bot=pb.array(first_20_scores[:,0]>7500) equate_mask1top=pb.array(first_20_scores[:,0]<12500) equate_mask2bot=pb.array(first_20_scores[:,1]>7500) equate_mask2top=pb.array(first_20_scores[:,1]<12500) equate_mask3bot=pb.array(first_20_scores[:,2]>7500) equate_mask3top=pb.array(first_20_scores[:,2]<12500) #can't figure out the all function to do this elegantly #equate_mask=equate_mask1bot & equate_mask1top & equate_mask2bot & equate_mask2top & equate_mask3bot & equate_mask3top equate_mask=equate_mask1bot & equate_mask1top #eligible_mask = equate_mask & played_more_than eligible_mask = played_more_than print str(sum(eligible_mask))+' players in analysis' #now rank everyone according to their top score #this calculates the boundaries percentiles = [stats.scoreatpercentile(maxscores[eligible_mask], per) for per in range(20,120,20)] #this assigns to percentiles (actually quintile) groups #NB assignment is to ALL players, but based on boundaries of only those who are eligible #non-eligible players get excluded later (because we reuse the eligible mask) player_percentiles = [bisect.bisect(percentiles, maxscore) for maxscore in maxscores] #this sets up some holding variables for the means mean_score_at_attempt_i_for_percentile_j = pb.zeros((20,5)) mean_agg_score_at_attempt_i_for_percentile_j = pb.zeros((20,5)) count_at_attempt_i_for_percentile_j = pb.zeros((20,5)) std_at_attempt_i_for_percentile_j = pb.zeros((20,5)) # ------------- for j in range(5): print "looking at percentile group %s"%j # this second mask chooses only those players whose max score is in the jth percentile scores_in_this_percentile = pb.array(player_percentiles) == j # we AND these together to get the mask #mask = played_more_than & scores_in_this_percentile & equate_mask percentile_mask = scores_in_this_percentile & eligible_mask # Then add up all the scores that satisfy the mask (everyone in this percentile who played more than 19 times) # and we divide them by the number of people maskwho satisfy the mask ## NOTE THAT WE ASSUME THAT NOONE SCORES ZERO! mean_score_at_attempt_i_for_percentile_j[:,j] = pb.sum(first_20_scores[percentile_mask ,:],0) / sum(first_20_scores[percentile_mask ,:] > 0, 0) mean_agg_score_at_attempt_i_for_percentile_j[:,j] = pb.sum(agg_scores[percentile_mask ,:],0) / sum(first_20_scores[percentile_mask ,:] > 0, 0) # #tom added this count_at_attempt_i_for_percentile_j[:,j] = sum(first_20_scores[percentile_mask ,:] > 0, 0) # the square route of the average squared difference from the mean std_at_attempt_i_for_percentile_j[:,j] = np.sqrt(sum(((first_20_scores[percentile_mask ,:]-mean_score_at_attempt_i_for_percentile_j[:,j])**2)*(first_20_scores[percentile_mask,:] > 0),0)/count_at_attempt_i_for_percentile_j[:,j]) pickle.dump(count_at_attempt_i_for_percentile_j, open('save_count_at_attempt_i_for_percentile_j.p', 'wb')) pickle.dump(std_at_attempt_i_for_percentile_j, open('save_std_at_attempt_i_for_percentile_j.p', 'wb')) pickle.dump(mean_agg_score_at_attempt_i_for_percentile_j, open('save_mean_agg_score_at_attempt_i_for_percentile_j.p', 'wb')) return mean_score_at_attempt_i_for_percentile_j
def getParamCovMat(prefix,dlogpower = 2, theoconstmult = 1.,dlogfilenames = ['dlogpnldloga.dat'],volume=256.**3,startki = 0, endki = 0, veff = [0.]): """ Calculates parameter covariance matrix from the power spectrum covariance matrix and derivative term in the prefix directory """ nparams = len(dlogfilenames) kpnl = M.load(prefix+'pnl.dat') k = kpnl[startki:,0] nk = len(k) if (endki == 0): endki = nk pnl = M.array(kpnl[startki:,1],M.Float64) covarwhole = M.load(prefix+'covar.dat') covar = covarwhole[startki:,startki:] if len(veff) > 1: sqrt_veff = M.sqrt(veff[startki:]) else: sqrt_veff = M.sqrt(volume*M.ones(nk)) dlogs = M.reshape(M.ones(nparams*nk,M.Float64),(nparams,nk)) paramFishMat = M.reshape(M.zeros(nparams*nparams*(endki-startki),M.Float64),(nparams,nparams,endki-startki)) paramCovMat = paramFishMat * 0. # Covariance matrices of dlog's for param in range(nparams): if len(dlogfilenames[param]) > 0: dlogs[param,:] = M.load(prefix+dlogfilenames[param])[startki:,1] normcovar = M.zeros(M.shape(covar),M.Float64) for i in range(nk): normcovar[i,:] = covar[i,:]/(pnl*pnl[i]) M.save(prefix+'normcovar.dat',normcovar) f = k[1]/k[0] if (volume == -1.): volume = (M.pi/k[0])**3 #theoconst = volume * k[1]**3 * f**(-1.5)/(12.*M.pi**2) #1 not 0 since we're starting at 1 for ki in range(1,endki-startki): for p1 in range(nparams): for p2 in range(nparams): paramFishMat[p1,p2,ki] = M.sum(M.sum(\ M.inverse(normcovar[:ki+1,:ki+1]) * M.outerproduct(dlogs[p1,:ki+1]*sqrt_veff[:ki+1],\ dlogs[p2,:ki+1]*sqrt_veff[:ki+1]))) paramCovMat[:,:,ki] = M.inverse(paramFishMat[:,:,ki]) return k[1:],paramCovMat[:,:,1:]
def findAreaOfInterest(self): """ Argument : - None Return : - None Use to detect the AOI by an edge detection technique. """ OD = self.findNoisyArea() max = 0 bestAngle = None for angle in range(-10, 10, 1): # the best angle is the one for wich the maximum peak is reached # on the yProfile (integral on the horizontal direction) ... image = rotate(OD, angle) yProfile = py.sum(image, axis=1) newMax = yProfile.max() if newMax > max: max = newMax bestAngle = angle # ... once found, the resulting OD image is kept and used to find the # the top and bottom bounds by edge detection. bestOD = rotate(OD, bestAngle) YProfile = py.sum(bestOD, axis=1) derivative = py.gradient(YProfile) N = 10 # because the derivative is usually very noisy, a sliding average is # performed in order to smooth the signal. This is done by a # convolution with a gate function of size "N". res = py.convolve(derivative, py.ones((N, )) / N, mode='valid') mean = res.mean() # index of the maximum value of the signal. i = res.argmax() while res[i] > mean: # once "i" is greater or equal to the mean of the derivative, # we have found the upper bound of the AOI. i -= 1 # for security we take an extra 50 pixels. y0 = int(i - 50) # index of the minimum value of the signal. i = res.argmin() while res[i] < mean: # once "i" is smaller or equal to the mean of the derivative, # we have found the lower bound of the AOI. i += 1 # Again, for security, we take an extra 50 pixels. y1 = int(i + 50) # The horizontal bound are taken to be maximal, but for security, we # take an extra 50 pixels. x0 = 50 x1 = py.shape(OD)[0] - 50 self.setAreaOfInterest(area=(x0, x1, y0, y1), angle=bestAngle) return bestOD[y0:y1, x0:x1]
def r_squared(y, estimated): """ Calculate the R-squared error term. Args: y: list with length N, representing the y-coords of N sample points estimated: a list of values estimated by the regression model Returns: a float for the R-squared error term """ mean = pylab.mean(y) R_sq = 1 - pylab.sum((y - estimated)**2)/pylab.sum((y - mean)**2) return R_sq
def Moving_Avg_Filter(self): self.stance_hip_filtered = pl.sum( self.stance_hip_arr, axis=0) / self.num_of_samples_in_buffer self.swing_foot_filtered = pl.sum( self.swing_foot_arr, axis=0) / self.num_of_samples_in_buffer self.swing_hip_filtered = pl.sum( self.swing_hip_arr, axis=0) / self.num_of_samples_in_buffer self.pelvis_m_filtered = pl.sum(self.pelvis_m_arr, axis=0) / self.num_of_samples_in_buffer self.com_m_filtered = pl.sum(self.com_m_arr, axis=0) / self.num_of_samples_in_buffer return ()
def CreateFromAliFile(self): self.LoadAligments(self.AliFile) printStr = '' self._lst_ignored_files = [] self.NumFrames = 0 created_means = False for index, (file_name, utterance_id) in \ enumerate(zip(self.RawFileList, self.UtteranceIds)): printStrNew = '\b' * (len(printStr)+1) printStr = "Loading data for utterance #: " + str(index+1) printString = printStrNew + printStr print printString, sys.stdout.flush() data = HTK.ReadHTKWithDeltas(file_name) if sum(isnan(data)) != 0 or sum(isinf(data)) != 0: self._lst_ignored_files.append(index) continue if not created_means: created_means = True self.data_dim = data.shape[0] self.__CreateMeansAndStdevs() self.DataSumSq += (data**2).sum(axis=1).reshape(-1,1) self.DataSum += data.sum(axis=1).reshape(-1,1) self.NumFrames += data.shape[1] if self.Utt2Speaker != None: speaker = self.Utt2Speaker[utterance_id] self.SpeakerMeans[speaker] += data.sum(axis=1).reshape(-1,1) self.SpeakerStds[speaker] += (data**2).sum(axis=1).reshape(-1,1) self.SpeakerNumFrames[speaker] += data.shape[1] sys.stdout.write("\n") for file_num in self._lst_ignored_files: sys.stdout.write("File # " + str(file_num) + " was ignored \ because of errors\n") if self.Utt2Speaker != None: for speaker in self.Speaker2Utt.keys(): self.SpeakerMeans[speaker] /= (1.0 *self.SpeakerNumFrames[speaker]) self.SpeakerStds[speaker] -= self.SpeakerNumFrames[speaker] * \ (self.SpeakerMeans[speaker]**2) self.SpeakerStds[speaker] /= (1.0 *self.SpeakerNumFrames[speaker]-1) self.SpeakerStds[speaker][self.SpeakerStds[speaker] < 1e-8] = 1e-8 self.SpeakerStds[speaker] = sqrt(self.SpeakerStds[speaker]) self.DataMeanVect = self.DataSum/self.NumFrames variances = (self.DataSumSq - self.NumFrames*(self.DataMeanVect**2))/(self.NumFrames-1) variances[variances < 1e-8] = 1e-8 self.DataStdVect = sqrt(variances)
def getDissim(data, atype, vbose=0, minRank=0, maxRank=50, NPOZ=50): ks = data.keys() matr = pylab.ones(len(ks)**2) matr = pylab.reshape(matr, (len(ks), len(ks))) scs = [] names = [] for ik, k_con in enumerate(ks): name = ik if not k_con in names: names.append(k_con) for jk, k_pl in enumerate(ks): ss1 = computeSimSc(data, k_con, k_pl, vbose=vbose, minRank=minRank, maxRank=maxRank, NPOZ=NPOZ) ss2 = computeSimSc(data, k_pl, k_con, vbose=vbose, minRank=minRank, maxRank=maxRank, NPOZ=NPOZ) if atype=='abs': sc1 = sum(ss1) sc2 = sum(ss2) elif atype=='rms': sc1 = pylab.sqrt(pylab.sum(ss1**2)) sc2 = pylab.sqrt(pylab.sum(ss2**2)) elif atype=='met': sc1 = sum(pylab.logical_and(ss1!=0, True)) sc2 = sum(pylab.logical_and(ss2!=0, True)) if vbose>=1: print 'score for ', k_con, k_pl, ss1, sc1, ss2, sc2 oldsc = sc1 + sc2 oldsc *= 0.5 l1 = len(data[k_con]) l2 = len(data[k_pl]) iscale = min(l1, l2) nsc = oldsc/(1.0*iscale) if vbose>=1: print k_con, k_pl, 'oldsc', oldsc, l1, l2, iscale, 'nsc', nsc matr[ik][jk] = nsc if jk<=ik: continue print nsc, 'xx', ik, k_con, jk, k_pl scs.append(nsc) return names, pylab.array(scs), matr
def adaptIntPlot(f,a,b): """ Adaptive (doubling partition) integration. Minimizes function evaluations at the expense of some tedious array minipulations. """ maxiter = 20 miniter = 5 tolerance = 0.1 maxnx = 2**maxiter minnx = 2**miniter x = 0.*M.zeros(maxnx) dx = (b-a)/2.#**minsteps nx = 2 x[0] = a x[1] = a+dx integral = M.sum(f(x[1:2]))*dx # 1 so we don't include the first endpt dx /= 2. newintegral = integral/2. + M.sum(f(x[:nx]+dx))*dx for i in range(nx-1,-1,-1): x[2*i] = x[i] x[2*i+1] = x[i] + dx nx *= 2 keepgoing = 1 while keepgoing == 1: integral = newintegral dx /= 2. eff = f(x[:nx]+dx) M.plot(x[:nx]+dx,f(x[:nx]+dx)) newintegral = integral/2. + M.sum(eff)*dx#M.sum(f(x[:nx]+dx))*dx print newintegral*nx/(nx-1) for i in range(nx-1,-1,-1): x[2*i] = x[i] x[2*i+1] = x[i] + dx nx *= 2 keepgoing = 0 if integral*newintegral > 0.: if ((M.fabs(M.log(integral*(nx/2)/(nx/2-1)/(newintegral*nx/(nx-1)))) >\ tolerance) and (nx < maxnx/2)) or (nx < minnx): keepgoing = 1 elif integral*newintegral == 0.: print "Hmmm, we have a zero integral here. Assuming convergence." else: keepgoing = 1 M.show() print nx, if nx == maxnx/2: print 'No convergence in utils.adaptInt!' return newintegral*nx/(nx-1)
def EM(self,Y,U,eps = 0.000000001): converged = False l = [1000] while not converged: # E step X, P, K, M = self.rtssmooth(Y, U) Xi11 = pb.sum([Pt + x*x.T for Pt,x in zip(P,X)],0) Xi10 = pb.sum([Mt + x1*x.T for Mt,x1,x in zip(M[1:],X[:-1],X[1:])],0) # M step self.A = pb.inv(Xi11)*Xi10 l.append(self.likelihood(self.x0,X,Y,U)) converged = abs(l[-1] - l[-2]) < eps print l[-1] return l, X, P
def mingumbel_mle_cfun(p,data): """ Returns the mingumbel mle log cost function (normalized) The gumbel CDF is 1 - exp( -exp( (x - m)/bt ) ). """ m = p[0] bt = p[1] n = len(data) cost = 0 cost = cost - pylab.log(bt) cost = cost + pylab.sum( (data - m)/bt)/n cost = cost - pylab.sum( pylab.exp( (data - m)/bt))/n return -cost
def duxbury_mle_cfun(p,L,data): """ Returns the duxbury mle log cost function (normalized) The duxbury CDF is 1 - exp( -(L^2)*exp( - (s/x)^2 ) ) """ s = p[0] n = len(data) cost = 0 cost = cost + 2*pylab.log(s) cost = cost - 3*pylab.sum( pylab.log(data) )/n cost = cost - L*L*pylab.sum( pylab.exp(-((s/data)**2)))/n cost = cost - pylab.sum( (s/data)**2)/n return -cost
def weibull_mle_cfun(p,data): """ Returns the weibull mle log cost function (normalized) The weibull CDF is 1 - exp(-(x/l)^k). """ k = p[0] l = p[1] n = len(data) cost = 0 cost = cost + pylab.log(k/l) cost = cost + (k-1)*pylab.sum(pylab.log(data/l))/n cost = cost - pylab.sum( (data/l)**k)/n return -cost
def groupConfidence(AllData): """ treat all data as group """ correct, confA, confB, RTs = getMatrix(AllData) answ = [1.5,2,3,4,5.5,6.5,7.5,8.5,9.5,10,11.5,12,13.5,14.5,15.5,16.5] hundy = py.ones(len(answ)) hundy = hundy*50 fig = py.figure() ax1 = fig.add_subplot(111) m,n = py.shape(correct) ANS0 = [] ANS1 = [] for i in range(m): ans0, ans1 = [], [] for j in range(n): ans0.append(confA[i,j]) ans1.append(confB[i,j]) ANS0.append(py.sum(ans0)) ANS1.append(py.sum(ans1)) # get democratic % correct collective = [] for i in range(len(ANS0)): if ANS0[i] > ANS1[i]: collective.append(0) else: collective.append(1) right = [] print(len(collective)) for i in range(len(answers)): if collective[i] == answers[i]: right.append(1) else: right.append(0) colmean = py.mean(right) print('Collective mean: %.3f' % colmean) prange = range(1,17) prange2 = [prange[i]+0.25 for i in range(len(prange))] # print(ANS0, ANS1) rects0 = ax1.bar(prange, ANS0, color='b', width=0.35) rects1 = ax1.bar(prange2, ANS1, color='r', width=0.35) ax1.set_title('Democratic decisions') ax1.set_xlabel('Question No.') ax1.set_ylabel('Confidence') ax1.plot(answ, hundy, 'k*')
def gen_cities_avg(climate, multi_cities, years): """ Compute the average annual temperature over multiple cities. Args: climate: instance of Climate multi_cities: the names of cities we want to average over (list of str) years: the range of years of the yearly averaged temperature (list of int) Returns: a pylab 1-d array of floats with length = len(years). Each element in this array corresponds to the average annual temperature over the given cities for a given year. """ res = [] for year in years: temperature = [] for city in multi_cities: yearly_temp = pylab.sum(climate.get_yearly_temp(city, year)) temperature.append(yearly_temp / (366 if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0) else 365)) res.append(sum(temperature) / len(multi_cities)) return pylab.array(res)
def psp_parameter_estimate_fixmem(time, value): smoothing_kernel = 10 smoothed_value = p.convolve( value, p.ones(smoothing_kernel) / float(smoothing_kernel), "same") mean_est_part = int(len(value) * .1) mean_estimate = p.mean(smoothed_value[-mean_est_part:]) noise_estimate = p.std(value[-mean_est_part:]) integral = p.sum(smoothed_value - mean_estimate) * (time[1] - time[0]) f = 1. A_estimate = (max(smoothed_value) - mean_estimate) / (1. / 4.) min_A = noise_estimate if A_estimate < min_A: A_estimate = min_A t1_est = integral / A_estimate * f t2_est = 2 * t1_est tmax_est = time[p.argmax(smoothed_value)] + p.log(t2_est / t1_est) * (t1_est * t2_est) / (t1_est - t2_est) return p.array([ tmax_est, A_estimate, t1_est, mean_estimate])
def Logistic(self): '''Create logistic model from data''' tStep = self.time[1] - self.time[0] # Time vector for calculating lag phase timevec = py.arange(self.time[0], self.time[-1], tStep / 2) # Try using to find logistic model with optimal lag phase # y = p2 + (A-p2) / (1 + exp(( (um/A) * (L-t) ) + 2)) sse = 0 sseF = 0 # Attempt to use every possible value in the time vector as the lag # Choose lag that creates best-fit model for idx, lag in enumerate(timevec): logDataTemp = [self.startOD + ((self.asymptote - self.startOD) / (1 + py.exp((((self.maxgrowth / self.asymptote) * (lag - t)) + 2) )) ) for t in self.time] sse = py.sum([((self.data[i] - logDataTemp[i]) ** 2) for i in xrange(len(self.data) - 1)]) if idx == 0 or sse < sseF: logisticData = logDataTemp lagF = lag sseF = sse return logisticData, lagF, sseF
def exclude_boundary_annotations(_signal, _annotation, _excluded_annotations): """ method removes boundary annotations from _annototion array and corresponding items from _signal array, returns named tuple SignalNoBoundaryAnnotation which includes new signal, new annotation array and indexes of remaining annotation values from _annontation array; what values are annotations is defined by _excluded_annotations parameter """ if _annotation == None or \ pl.sum(_annotation, dtype=int) == 0: return SignalNoBoundaryAnnotation(_signal, _annotation, None) #removing nonsinus beats from the beginning while (_annotation[0] != 0 and (_excluded_annotations == ALL_ANNOTATIONS or _annotation[0] in _excluded_annotations)): _signal = _signal[1:] _annotation = _annotation[1:] if len(_signal) == 0: break if len(_signal) > 0: #removing nonsinus beats from the end while (_annotation[-1] != 0 and (_excluded_annotations == ALL_ANNOTATIONS or _annotation[-1] in _excluded_annotations)): _signal = _signal[:-1] _annotation = _annotation[:-1] if len(_signal) == 0: break annotation_indexes = get_annotation_indexes(_annotation, _excluded_annotations) return SignalNoBoundaryAnnotation(_signal, _annotation, annotation_indexes)
def _filter(self,Y): ## initialise xf=self.x0 Pf=self.P0 # filter quantities xfStore =[] PfStore=[] #calculate the weights Wm_i,Wc_i=self.sigma_vectors_weights() for y in Y: #calculate the sigma points matrix, each column is a sigma vector Xi_f_=self.sigma_vectors(xf,Pf) #propogate sigma verctors through non-linearity Xi_f=self.state_equation(Xi_f_) #pointwise multiply by weights and sum along y-axis xf_=pb.sum(Wm_i*Xi_f,1) xf_=xf_.reshape(self.nx,1) #purturbation Xi_purturbation=Xi_f-xf_ weighted_Xi_purturbation=Wc_i*Xi_purturbation Pf_=pb.dot(Xi_purturbation,weighted_Xi_purturbation.T)+self.Sigma_e #measurement update equation Pyy=dots(self.C,Pf_,self.C.T)+self.Sigma_varepsilon Pxy=pb.dot(Pf_,self.C.T) K=pb.dot(Pxy,pb.inv(Pyy)) yf_=pb.dot(self.C,xf_) xf=xf_+pb.dot(K,(y-yf_)) Pf=pb.dot((pb.eye(self.nx)-pb.dot(K,self.C)),Pf_) xfStore.append(xf) PfStore.append(Pf) return xfStore,PfStore
def optimize_segment(data, dt, interval_start, ftol=1e-3): """ Estimate the recurrence time of a periodic signal within data. data : 1d numpy.ndarray the periodic signal dt : float sampling interval for data interval_start : float the interval that is taken as initial guess for the optimization (in the same unit as dt) ftol : float tolerance on the solution (passed to scipy.optimize.fmin) """ i = fmin( lambda interval: p.sum( p.var( segment(data, dt, interval), axis=0)), interval_start, ftol=ftol) return i[0]
def check(self, _data_vector): """ if there are no annotations, a message is returned """ if _data_vector.annotation == None or \ pl.sum(_data_vector.annotation, dtype=int) == 0: return "No annotations found in signal data !"
def getEazyPz(idx, MAIN_OUTPUT_FILE='photz', OUTPUT_DIRECTORY='./OUTPUT', CACHE_FILE='Same'): """ zgrid, pz = getEazyPz(idx, \ MAIN_OUTPUT_FILE='photz', \ OUTPUT_DIRECTORY='./OUTPUT', \ CACHE_FILE='Same') Get Eazy p(z) for object #idx. """ tempfilt, coeffs, temp_seds, pz = readEazyBinary(MAIN_OUTPUT_FILE=MAIN_OUTPUT_FILE, \ OUTPUT_DIRECTORY=OUTPUT_DIRECTORY, \ CACHE_FILE = CACHE_FILE) if pz is None: return None, None ###### Get p(z|m) from prior grid kidx = pz['kidx'][idx] #print kidx, pz['priorzk'].shape if (kidx > 0) & (kidx < pz['priorzk'].shape[1]): prior = pz['priorzk'][:, kidx] else: prior = pylab.ones(pz['NZ']) ###### Convert Chi2 to p(z) pzi = pylab.exp( -0.5 * (pz['chi2fit'][:, idx] - min(pz['chi2fit'][:, idx]))) * prior if pylab.sum(pzi) > 0: pzi /= pylab.trapz(pzi, tempfilt['zgrid']) ###### Done return tempfilt['zgrid'], pzi
def collectAlleles(at, thresh = 0.05): lineageGrps = at["lineageGrp"].unique() at_piv = pd.pivot_table(at, index="cellBC", columns="intBC", values="UMI", aggfunc="count") at_piv.fillna(value = 0, inplace=True) at_piv[at_piv > 0] = 1 lgs = [] for i in tqdm(lineageGrps): lg = at[at["lineageGrp"] == i] cells = lg["cellBC"].unique() lg_pivot = at_piv.loc[cells] props = lg_pivot.apply(lambda x: pylab.sum(x) / len(x)).to_frame().reset_index() props.columns = ["iBC", "prop"] props = props.sort_values(by="prop", ascending=False) props.index = props["iBC"] p_bc = props[(props["prop"] > thresh) & (props["iBC"] != "NC")] lg_group = lg.loc[np.in1d(lg["intBC"], p_bc["iBC"])] lgs.append(lg_group) return lgs
def Main(): options, _ = MakeOpts().parse_args(sys.argv) assert options.input_filename f = open(options.input_filename) r = csv.DictReader(f) orgs = r.fieldnames[1:] names = [] data = [] for i, row in enumerate(r): names.append(row.get('Enzyme Function')) a = pylab.array([float(row[k]) for k in orgs]) data.append(a / 10000.0) fig = pylab.figure() ytick_formatter = FormatStrFormatter('%d%%') m = pylab.array(data).T rows, cols = m.shape cur_bottom = pylab.zeros(rows) left = range(rows) colors = ColorMap(range(cols)) for i in xrange(cols): heights = m[:,i] pylab.bar(left, heights, bottom=cur_bottom, color=colors[i], width=0.5, align='center', label=names[i]) cur_bottom += heights pylab.ylim((0.0, cur_bottom.max())) pcts = pylab.sum(m, 1) ticks = ['%s \n%.1f%%' % (o,p) for o,p in zip(orgs,pcts)] pylab.xticks(pylab.arange(rows), ticks, ha='center') ax = fig.get_axes() ax[0].yaxis.set_major_formatter(ytick_formatter) pylab.legend() pylab.show()
def plotDensityMap(bins,binnedData,xscale='log',yscale='log',normaliseX=True,logScale=True): if logScale: l, b, r, t = 0.1, 0.12, 1.0, 0.970 else: l, b, r, t = 0.1, 0.12, 1.05, 0.970 axes_rect = [l,b,r-l,t-b] fig=p.figure() fig.subplots_adjust(left=0.01, bottom=.05, right=.985, top=.95, wspace=.005, hspace=.05) ax = fig.add_axes(axes_rect) ax.set_xscale(xscale) ax.set_yscale(yscale) X,Y=bins.edge_grids if normaliseX: ySum=p.sum(binnedData,1) ySum=p.ma.masked_array(ySum,ySum==0) Z=binnedData.transpose()/ySum else: Z=binnedData.transpose() if logScale: mappable=ax.pcolor(X,Y,p.ma.array(Z,mask=Z==0),cmap=p.get_cmap("jet"),norm=matplotlib.colors.LogNorm()) else: mappable=ax.pcolor(X,Y,p.ma.array(Z,mask=Z==0),cmap=p.get_cmap("jet")) markersize=5.0 linewidth=2.0 fig.colorbar(mappable) #ax.set_ylim((T/2.0/bins2D.centers[0][-1],T/2.0*2)) #ax.set_xlim(bins2D.centers[0][0]*day,bins2D.centers[0][-1]*day) return fig,ax
def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2): # the following is to include a "noise floor" so that level value # zero prior does not exert undue influence on age pattern # smoothing gamma = gamma.clip(pl.log(pl.exp(gamma).mean()/10.), pl.inf) # only include smoothing on values within 10x of mean return mc.normal_like(pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau)
def alpha_psp_parameter_estimate(time, value, smoothing_samples=10): t1_est_min = time[1] - time[0] mean_est_part = len(value) * .1 mean_estimate = p.mean(value[-mean_est_part:]) noise_estimate = p.std(value[-mean_est_part:]) smoothed_value = p.convolve( value - mean_estimate, p.ones(smoothing_samples) / float(smoothing_samples), "same") + mean_estimate integral = p.sum(smoothed_value - mean_estimate) * (time[1] - time[0]) f = 1. height_estimate = (max(smoothed_value) - mean_estimate) min_height = noise_estimate if height_estimate < min_height: height_estimate = min_height t1_est = integral / height_estimate * f if t1_est < t1_est_min: t1_est = t1_est_min t2_est = 2 * t1_est tmax_est = time[p.argmax(smoothed_value)] tstart_est = tmax_est + p.log(t2_est / t1_est) \ * (t1_est * t2_est) / (t1_est - t2_est) return p.array( [height_estimate, t1_est, t2_est, tstart_est, mean_estimate])
def int(self, integrand): """ Integrates over second argument of an array with Gaussian Quadrature weights """ return M.sum(M.outer(1.*M.ones(integrand.shape[0]),self.weights) * \ integrand,1)
def computeEvolutionOfOrder(self): """ Argument : - None. Return : - None. Evaluate the population of each order for a given optical density image.""" # number of orders n = len(self.coords) # number of values for each order m = len(self.profileArray) indexesOfOrder = [i - int(n / 2) for i in range(n)] self.orders = {i: py.zeros(m) for i in indexesOfOrder} # Compute the integral of each order for j, profile in enumerate(self.profileArray): normalizeFactor = 0 for x0x1, i in zip(self.coords, indexesOfOrder): x0, x1 = x0x1[0], x0x1[1] integral = py.sum(profile[x0:x1], axis=0) self.orders[i][j] = integral normalizeFactor += integral # normalization for the current profile for i in indexesOfOrder: self.orders[i][j] /= normalizeFactor
def log_likelihood(theta, x, y, xerr, yerr): m, b, s = theta model = m * x + b sigma2 = s**2 + yerr**2 + m**2 * xerr**2 return -0.5 * pyl.sum(pyl.log(2 * pyl.pi * sigma2) + (y - model)**2 / sigma2)
def get_test_layup2(): fiber_layup = {} th = py.array([1.0] * 6) * 1e-3 angle = py.array([0.0] * len(th)) E1 = 40000e6 E2 = 11500e6 v12 = 0.3 G12 = 4500e6 thickness = py.sum(th) th_cur = -thickness / 2.0 fiber_layup["thickness"] = thickness fiber_layup["fiber_nr"] = len(th) for i_lay in range(1, len(th) + 1): fiber_layup[i_lay] = {} fiber_layup[i_lay]["E1"] = E1 fiber_layup[i_lay]["E2"] = E2 fiber_layup[i_lay]["nu12"] = v12 fiber_layup[i_lay]["G12"] = G12 fiber_layup[i_lay]["angle"] = angle[i_lay - 1] * py.pi / 180 fiber_layup[i_lay]["thickness"] = th[i_lay - 1] fiber_layup[i_lay]["z_start"] = th_cur th_cur += th[i_lay - 1] fiber_layup[i_lay]["z_end"] = th_cur return (fiber_layup)
def pick_handler(event): artist = event.artist mouseevent = event.mouseevent i = argmin( sum((array(artist.get_data()).T - array([mouseevent.xdata, mouseevent.ydata]))**2, axis=1)) x, y = array(artist.get_data()).T[i] Nelements = len(artist.get_data()[0]) d = str(artist.get_url()[i]) if Nelements > 1 else str(artist.get_url()) ax = artist.axes if not hasattr(ax, "annotation"): ax.annotation = ax.annotate( d, xy=(x, y), xycoords='data', xytext=(0, 30), textcoords="offset points", size="larger", va="bottom", ha="center", bbox=dict(boxstyle="round", fc="w", alpha=0.5), arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.2"), ) ax.annotation.xy = (x, y) ax.annotation.set_text(d) artist.figure.canvas.draw()
def addDataVectorAccessor(self, data_vector_accessor): self.__data_vectors_accessors__.append(data_vector_accessor) _sum = pl.sum(data_vector_accessor.signal) _min = pl.amin(data_vector_accessor.signal) _max = pl.amax(data_vector_accessor.signal) if self.__minimal_signal__ == None: self.__minimal_signal__ = _sum self.__minimal_data_vector_accessor__ = data_vector_accessor self.__min_signal__ = _min self.__max_signal__ = _max if _sum < self.__minimal_signal__: self.__minimal_data_vector_accessor__ = data_vector_accessor self.__minimal_signal__ = _sum if _min < self.__min_signal__: self.__min_signal__ = _min if _max > self.__max_signal__: self.__max_signal__ = _max #collects unique annotations (>0) as a set if not data_vector_accessor.annotation == None: unique_annotations = pl.unique(data_vector_accessor.annotation[ pl.where(data_vector_accessor.annotation > 0)]) if len(unique_annotations) > 0: #union of sets self.__unique_annotations__ |= set(unique_annotations)
def Logistic(self): '''Create logistic model from data''' tStep = self.time[1] - self.time[0] # Time vector for calculating lag phase timevec = py.arange(self.time[0], self.time[-1], tStep / 2) # Try using to find logistic model with optimal lag phase # y = p2 + (A-p2) / (1 + exp(( (um/A) * (L-t) ) + 2)) sse = 0 sseF = 0 # Attempt to use every possible value in the time vector as the lag # Choose lag that creates best-fit model for idx, lag in enumerate(timevec): logDataTemp = [ self.startOD + ((self.asymptote - self.startOD) / (1 + py.exp( (((self.maxgrowth / self.asymptote) * (lag - t)) + 2)))) for t in self.time ] sse = py.sum([((self.data[i] - logDataTemp[i])**2) for i in xrange(len(self.data) - 1)]) if idx == 0 or sse < sseF: logisticData = logDataTemp lagF = lag sseF = sse return logisticData, lagF, sseF
def calcNewGrowth(logistic, asym, y0): """ Calculate growth level using an adjusted harmonic mean using a logistic model, its asymptote, and its starting OD value """ diff = asym - y0 return len(logistic) / py.sum((1 / (logistic + diff)))
def calcAUC(data, y0, lag, mgr, asym, time): """ Calculate the area under the curve of the logistic function using its integrated formula [ A( [A-y0] log[ exp( [4m(l-t)/A]+2 )+1 ]) / 4m ] + At """ # First check that max growth rate is not zero # If so, calculate using the data instead of the equation if mgr == 0: auc = calcAUCData(data, time) else: timeS = time[0] timeE = time[-1] t1 = asym - y0 #try: t2_s = py.log(py.exp((4 * mgr * (lag - timeS) / asym) + 2) + 1) t2_e = py.log(py.exp((4 * mgr * (lag - timeE) / asym) + 2) + 1) #except RuntimeWarning as rw: # Exponent is too large, setting to 10^3 # newexp = 1000 # t2_s = py.log(newexp + 1) # t2_e = py.log(newexp + 1) t3 = 4 * mgr t4_s = asym * timeS t4_e = asym * timeE start = (asym * (t1 * t2_s) / t3) + t4_s end = (asym * (t1 * t2_e) / t3) + t4_e auc = end - start if py.absolute(auc) == float('Inf'): x = py.diff(time) auc = py.sum(x * data[1:]) return auc
def r_squared(y, estimated): """ Calculate the R-squared error term. Args: y: 1-d pylab array with length N, representing the y-coordinates of the N sample points estimated: an 1-d pylab array of values estimated by the regression model Returns: a float for the R-squared error term """ #R^2 = 1 - E (actual - estimate)^2 / E (actual - mean)^2 r_sq = 1 - pylab.sum((y - estimated)**2)/pylab.sum((y - pylab.mean(y))**2) return r_sq
def fBM(n, H): """ creates fractional Brownian motion parameters: length of sample path; Hurst exponent this method uses another computational approach than fBM http://en.wikipedia.org/wiki/Fractional_Brownian_motion#Method_2_of_simulation I should somewhen look that up with proper references - proper scaling is not implemented. Look up the article to implement it! """ gammaH = gamma(H + .5) def KH(t, s): """ accordint to the article """ return (t - s)**(H - .5) / gammaH * hyp2f1(H - .5, .5 - H, H + .5, 1. - float(t) / float(s)) incs = randn(n + 1) path = zeros(n + 1) for pos in arange(n) + 1: path[pos] = sum([KH(pos, x) * incs[x] for x in arange(pos) + 1]) return path[1:]
def mean_score_per_attempt(num_attempts, num_attempt_limit, maxscores, first_20_scores): played_more_than = pb.array(num_attempts) > num_attempt_limit percentiles = [stats.scoreatpercentile(maxscores[played_more_than], per) for per in range(20,120,20)] player_percentiles = [bisect.bisect(percentiles, maxscore) for maxscore in maxscores] mean_score_at_attempt_i_for_percentile_j = pb.zeros((20,5)) count_at_attempt_i_for_percentile_j = pb.zeros((20,5)) std_at_attempt_i_for_percentile_j = pb.zeros((20,5)) for j in range(5): print "looking at percentile group %s"%j # this second mask chooses only those players whose max score is in the jth percentile scores_in_this_percentile = pb.array(player_percentiles) == j # we AND these together to get the mask mask = played_more_than & scores_in_this_percentile # Then add up all the scores that satisfy the mask (everyone in this percentile who played more than 19 times) # and we divide them by the number of people maskwho satisfy the mask ## NOTE THAT WE ASSUME THAT NOONE SCORES ZERO! mean_score_at_attempt_i_for_percentile_j[:,j] = pb.sum(first_20_scores[mask,:],0) / sum(first_20_scores[mask,:] > 0, 0) # #tom added this count_at_attempt_i_for_percentile_j[:,j] = sum(first_20_scores[mask,:] > 0, 0) # the square route of the average squared difference from the mean std_at_attempt_i_for_percentile_j[:,j] = np.sqrt(sum(((first_20_scores[mask,:]-mean_score_at_attempt_i_for_percentile_j[:,j])**2)*(first_20_scores[mask,:] > 0),0)/count_at_attempt_i_for_percentile_j[:,j]) pickle.dump(count_at_attempt_i_for_percentile_j, open('save_count_at_attempt_i_for_percentile_j.p', 'wb')) pickle.dump(std_at_attempt_i_for_percentile_j, open('save_std_at_attempt_i_for_percentile_j.p', 'wb')) return mean_score_at_attempt_i_for_percentile_j
def __calculate__(self): global USE_IDENTITY_LINE sd1 = (self.signal_plus - self.signal_minus) / pl.sqrt(2) if USE_IDENTITY_LINE: return pl.sqrt(pl.sum((sd1**2)) / len(self.signal_plus)) else: return pl.sqrt(pl.var(sd1))
def classify(self, x): d = self.X - tile(x.reshape(self.n,1), self.N); dsq = sum(d*d,0) # Get N nearest neighbors minindex = np.argsort(dsq)[0:self.k] # Group sum by value return Counter(self.c[minindex]).most_common()[0][0]
def findNoisyArea(self): """ Argument : - None Return : - None Use to detect the noisy area (the area without atoms) by an edge detection technique. """ OD = self.computeFirstOD() yProfile = py.sum(OD, axis=1) derivative = py.gradient(yProfile) N = 10 # because the derivative is usually very noisy, a sliding average is # performed in order to smooth the signal. This is done by a # convolution with a gate function of size "N". res = py.convolve(derivative, py.ones((N, )) / N, mode='valid') mean = res.mean() # index of the maximum value of the signal. i = res.argmax() while res[i] >= mean: # once "i" is greater or equal to the mean of the derivative, # we have found the upper bound of the noisy area. i -= 1 # index of the minimum value of the signal. upBound = i - 50 i = res.argmin() while res[i] < mean: # once "i" is smaller or equal to the mean of the derivative, # we have found the lower bound of the noisy area. i += 1 downBound = i + 50 self.setNoisyArea((upBound, downBound)) return OD
def __calculate__(self): global USE_IDENTITY_LINE sd1 = (self.signal_plus - self.signal_minus) / pl.sqrt(2) if USE_IDENTITY_LINE: return pl.sqrt(pl.sum((sd1 ** 2)) / len(self.signal_plus)) else: return pl.sqrt(pl.var(sd1))