def shuffle(self, brknumber, offset=0, span=6): loxs=self.__xsegs[brknumber-1] loys=self.__ysegs[brknumber-1] loyrs=self.__yearsegs[brknumber-1] hixs=self.__xsegs[brknumber] hiys=self.__ysegs[brknumber] hiyrs=self.__yearsegs[brknumber] if span!=0: xs=list(loxs[-span:]) xs.extend(hixs[:span]) ys=list(loys[-span:]) ys.extend(hiys[:span]) yrs=list(loyrs[-span:]) yrs.extend(hiyrs[:span]) stat=shuffle.shuffle_stat(xs, ys, yrs, span+offset) else: xs=list(loxs[:]) xs.extend(hixs[:]) ys=list(loys[:]) ys.extend(hiys[:]) yrs=list(loyrs[:]) yrs.extend(hiyrs[:]) stat=shuffle.shuffle_stat(xs, ys, yrs, len(loxs) + offset) bins=np.bincount(stat[0]) mode=bins.argmax() bv=bivariate.bivariate(ys, xs, anomalise=False, pr=0.01) return stat, bins[yrs[0]:], yrs, mode, stats.norm.fit(stat[0]),stats.norm.fit(stat[1]),stats.norm.fit(stat[2]), bv.maxTi(), bv.stepChange()
def resample_break(testdata, datayears, N=30): brks=[] tis=[] for i in range(N): controldata = np.array([random.random() for y in datayears]) bv=bivariate.bivariate(testdata,controldata, anomalise=False, pr=0.01) tis.append(bv.maxTi()) brks.append(datayears[bv.maxIndexTi()]) return stats.norm.fit(brks), stats.norm.fit(tis)
def resample_break(testdata, datayears, N=30, withmode=False): brks=[] tis=[] shifts=[] try: for i in range(N): step=0 controldata = np.array([random.random() for y in datayears]) step=1 bv=bivariate.bivariate(testdata,controldata, anomalise=False, pr=0.01) step=2 tis.append(bv.maxTi()) step=3 brks.append(datayears[bv.maxIndexTi()]) step=4 shifts.append(bv.stepChange()) step=5 except Exception as e: print str(e), step raise e if withmode: yearfreqs=np.bincount(brks) #print brks,len(yearfreqs),yearfreqs,range(int(datayears[0])-1,int(datayears[-1])-1) first=second=firstval=secondval=0 for i in range(int(min(brks))-1,int(max(brks))+1): # print i if yearfreqs[i] >firstval: second=first first=i firstval=yearfreqs[first] secondval=yearfreqs[second] elif yearfreqs[i] >secondval: second=i secondval=yearfreqs[second] #now we also will need the mean of the Tis and shifts for each of the modal values brks=np.array(brks) mask1=np.where(brks==first) timean1=np.mean(np.array(tis)[mask1]) shmean1=np.mean(np.array(shifts)[mask1]) if second == 0: return stats.norm.fit(brks[mask1]), stats.norm.fit(np.array(tis)[mask1]),stats.norm.fit(np.array(shifts)[mask1]), [(first, float(firstval)/N), None, None,(timean1, shmean1), None] else: mask2=np.where(brks==second) timean2=np.mean(np.array(tis)[mask2]) shmean2=np.mean(np.array(shifts)[mask2]) return stats.norm.fit(brks[mask1]), stats.norm.fit(np.array(tis)[mask1]),stats.norm.fit(np.array(shifts)[mask1]), [(first, float(firstval)/float(N)), (second, float(secondval)/float(N)), yearfreqs[-len(datayears):],(timean1, shmean1), (timean2, shmean2)] # if second == 0: # return stats.norm.fit(brks), stats.norm.fit(tis),stats.norm.fit(shifts), [(first, float(firstval)/N), None, None,(timean1, shmean1), None] # else: # mask2=np.where(brks==second) # timean2=np.mean(np.array(tis)[mask2]) # shmean2=np.mean(np.array(shifts)[mask2]) # return stats.norm.fit(brks), stats.norm.fit(tis),stats.norm.fit(shifts), [(first, float(firstval)/float(N)), (second, float(secondval)/float(N)), yearfreqs[-len(datayears):],(timean1, shmean1), (timean2, shmean2)] else: return stats.norm.fit(brks), stats.norm.fit(tis),stats.norm.fit(shifts)
def classify(ys, years, Year, window=10, span=2): spins=range(-span, span+1) counts=np.zeros(np.shape(ys)) pos=list(years).index(Year) # if #need to think about what if window - span < 0 for i in spins: lo=max(0, pos-window+i) hi=min(len(ys)+1, pos+window+i) yslice=ys[lo:hi] # yrslice=years[lo:hi] # slpos=list(yrslice).index(Year) for j in range(int(100/(2*span+1))): bv=bivariate.bivariate(yslice, np.array([random.random() for y in yslice]), anomalise=False, pr=0.01) #(testdata,controldata, anomalise=False, pr=0.01) bvpos=bv.maxIndexTi() counts[lo+bvpos]+=1 return np.max(counts)/np.sum(counts)
def shuffle_stat(xs1, ys1, Years, pos, iterations=100): ''' recompute Ti0 and breakpoint locations by reanalysing the values on each side of the breaks point xs are control ys are test pos is the location being tested iterations is the number of times to test ''' #MUST take copies of input because the shuffles have side effects. JHR 13/10/2014 if len(xs1) == 0: print "oops" xs=copy.copy(xs1) ys=copy.copy(ys1) index=range(len(xs)) TiList = [] TiPosIndex=[] ShiftList=[] pos = min(pos, len(index)) for i in range(iterations): lowx=xs[:pos+1] lowy=ys[:pos+1] hix=xs[pos+1:] hiy=ys[pos+1:] np.random.shuffle(lowx) np.random.shuffle(lowy) np.random.shuffle(hix) np.random.shuffle(hiy) lowx = np.append(lowx, hix) lowy = np.append(lowy, hiy) bv=bivariate.bivariate(lowy, lowx, anomalise=False, constantsxy=True) try: TiList.append(bv.maxTi()) TiPosIndex.append(Years[bv.maxIndexTi()]) ShiftList.append(bv.stepChange()) except Exception as e: print str(e) raise return TiPosIndex, TiList,ShiftList
Created on Tue Feb 24 15:27:36 2015 @author: s4493222 """ import numpy as np import bivariate_multi as bivariate import random SVNRevision="$Revision: 308 $" #self, rawdata, xs, anomalise=True, step=1, averagesteps=False, critical=None, pr= None, window = 5, constantsxy=True): ys=np.array([random.random() + i/100. for i in range(100)]) xs=np.array([random.random() for i in range(100)]) bv=bivariate.bivariate(ys, xs, pr=0.05, anomalise=False, window=1) print "Trended",bv.maxIndexTi(), bv.maxTi() ys2=ys #ys2=np.array([random.random() + 0.1 + int(i >49) * 0.1 for i in range(100)]) #ys2=np.array([random.random() + 0.1 + int(i >49) * 0.1 + i/100. for i in range(100)]) #ys2=np.array([random.random() + 0.1 + int(i >49) * 0.1 + i/10. for i in range(100)]) ys2=np.array([random.random() + 0.1 + int(i >49) * 0.1 + (i/100.) * int(abs(i-49) <3) for i in range(100)]) #ys2=ys bv=bivariate.bivariate(ys2, xs, pr=0.05, anomalise=False, window=1) print bv.maxIndexTi(), bv.maxTi() loc=bv.maxIndexTi()
timestring="%d-%d-%d-%d-%d" % datetime.datetime.timetuple(datetime.datetime.utcnow())[0:5] prewhiten=False fn=os.environ["HOMEPATH"]+"\\Documents\\ReferenceData\\Rogers_Analysis_17Jul2014\\Data 4 Jim\\GISSTEMPto6-2013\\GISSTEMPto6-2013b.csv" fn=os.environ["HOMEPATH"]+"\\Documents\\abrupt\\SpatialBPs\\Qld_191001-201312.csv" fn=os.environ["HOMEPATH"]+"\\Documents\\ReferenceData\\Rogers_Analysis_17Jul2014\\Data 4 Jim\\GISSTEMPto6-2013\\GISSTEMPto6-2013b.csv" fn="C:\Users\s4493222\Documents\CourseWork(ROP8001&2)\Confirmation\IllustrativeData.csv" fn=os.environ["HOMEPATH"]+"\\Documents\\ReferenceData\\GISS\\gisstemp\\tabledata_v3\\GLB.Ts+dSST.csv" TraceFile=fn+".trace" trace=True #fn=os.environ["HOMEPATH"]+"\\Documents\\ReferenceData\\BOM\\rutherglen_82039.csv" #fn = tests17Aug.fn data=np.genfromtxt(fn,delimiter=",",names=True,filling_values =np.NaN, skiprows=0) #fill with NaNs so must use their bounds ys=data["JD"] #ys=data["DeWobble"] # ys=data["24SEQU"] #ys=data["Mint"] Years=data["Year"] #xs=data["Maxt"] xs = np.array([random.random() for i in range(len(ys))]) if prewhiten: import STARS import whitening ys=whitening.prewhiten(ys,STARS.AlphaEst(ys, 15, option="optIPN4", returnmsgs=False)) print xs, ys print convergentBreaks(ys, xs, Years, "64N90N", mode="control", guide="AIC",trace=trace) bv=bivariate.bivariate(ys, xs, anomalise=False) print bv.maxTi(), bv.maxIndexTi() print bv.allPoints(0.05)
def __init__(self, ys, xs, years, model, pr=0.01, smooth=False, anom=False, onethreshold=True, trim=1, debug=False, ConstSxy=True, withshifts=False): try: #print "Pr size", pr, size self.__size=len(xs) #print self.__size, type(self.__size) self.__threshold=bivariate.critTi(pr, self.__size) self.__breakpoints=np.zeros((self.__size,)) self.__breakyears = {} except Exception as e: print bivariate.ExceptionInfo() print "__init__",str(e) raise recurseTestException("__init__:"+str(e)) if smooth: if anom: #print "window smooth anom" txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1) self.__ys = np.array(ys)-np.array(tys) self.__xs=np.copy(xs) else: #print "window smooth not anom" txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1) self.__ys = np.array(ys) self.__xs=np.array(tys)+np.array(xs) else: #print "window not smooth" self.__ys=np.array(ys) self.__xs=np.copy(xs) #print "window call bivariate" try: #self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=self.__threshold, anomalise=False,constantsxy=ConstSxy) self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=None, pr=pr, anomalise=False,constantsxy=ConstSxy) if withshifts: ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs), ap_shifts = self.__bv.allPoints(pr, withshifts=True) else: ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs)= self.__bv.allPoints(pr, withshifts=False) #print "BV.BIVARIATE -> ", len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs) except Exception as e: print bivariate.ExceptionInfo() raise recurseTestException("recurse.__init__ call to bivariate:"+str(e)) #print ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs) if trim == 1: #============================================================================== ## This mode implements a trimming mode whereby breakpoints tested by ## bracketing them and accepting them if there ids a breakpoint in the interval ## The alternative (mode 2) is to replace them with the new breakpoint #============================================================================== try: stepn = 0 if debug: print "trimming" trimmed = True while trimmed: trimmed = False for mi in range(len(ap_MaxIndexes)): if debug: print "considering mi" if not trimmed: stepn = 1 self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr) stepn=2 #print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]) stepn = 3 if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]): stepn = 4 trimmed = True if debug: print "trimmer removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)] stepn = 5 #print mi, len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs) #print ap_MaxTis, " becomes ", pop1 = ap_MaxTis.pop(int(mi)) #print ap_MaxTis pop2 = ap_MaxIndexes.pop(int(mi)) pop3 = ap_lows.pop(int(mi)) pop4 = ap_highs.pop(int(mi)) with open("removals.txt","a") as rmf: rmf.write("from %s trimmer removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)])))) except Exception as e: raise recurseTestException("at trim state "+str(stepn)+" in __init__:"+str(e)) if trim == 2: #============================================================================== ## This mode implements a trimming mode whereby breakpoints tested by ## bracketing them and accepting them if there ids a breakpoint in the interval ## The alternative (mode 2) is to replace them with the new breakpoint #============================================================================== try: stepn = 0 if debug: print "trimming mode 2" trimmed = True while trimmed: trimmed = False for mi in range(len(ap_MaxIndexes)): if debug: print "considering mi" if not trimmed: stepn = 1 self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr) stepn=2 print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]) if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]): trimmed = True if debug: print "trimmer (mode 2) removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)] pop1 = ap_MaxTis.pop(int(mi)) pop2 = ap_MaxIndexes.pop(int(mi)) pop3 = ap_lows.pop(int(mi)) pop4 = ap_highs.pop(int(mi)) with open("removals.txt","a") as rmf: rmf.write("from %s trimmer (mode 2) removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)])))) elif self.__bv.maxIndexnTi() != ap_MaxIndexes[int(mi)]: trimmed = True pop1 = ap_MaxTis[int(mi)] pop2 = ap_MaxIndexes[int(mi)] pop3 = ap_lows[int(mi)] pop4 = ap_highs[int(mi)] ap_MaxTis[int(mi)] = self.__bv.maxnTi() ap_MaxIndexes[int(mi)] = self.__bv.maxIndexnTi() with open("removals.txt","a") as rmf: rmf.write("from %s trimmer (mode 2) substituted element %s (Ti:%s ) %s with %s between %s and %s crit=%s \n" % (str(model), str(mi), str(ap_MaxIndexes[int(mi)]), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])))) except Exception as e: raise recurseTestException("at mode 2 trim state "+str(stepn)+" in __init__:"+str(e)) try: stepn = 0 for mi in range(len(ap_MaxIndexes)): #print mi stepn = 1 if ((onethreshold and (ap_MaxTis[int(mi)] >= self.__threshold)) or (not onethreshold and (ap_MaxTis[int(mi)] >= bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])))): self.__breakpoints[int(ap_MaxIndexes[int(mi)])] += 1 stepn = 2 self.__breakyears[years[int(ap_MaxIndexes[int(mi)])]] = None stepn = 3 if withshifts: self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1], ap_shifts[mi]) else: self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1]) except Exception as e: print "Exception -----------------------------------------------------------" try: print "mi",mi #print "years",years print "ap_MaxIndexes",ap_MaxIndexes print "ap_MaxTis",ap_MaxTis print "ap_lows",ap_lows print "ap_highs",ap_highs print "self.__breakyears",self.__breakyears print "years[int(ap_MaxIndexes[int(mi)])]",years[int(ap_MaxIndexes[int(mi)])] except: pass print bivariate.ExceptionInfo() raise recurseTestException("Reporting loop of __init__: "+str(stepn)+" "+str(e))
@author: James """ #classify_breaks import numpy as np; import bivariate_multi as bivariate import random def classify(ys, years, Year, window=10, span=2): spins=range(-span, span+1) counts=np.zeros(np.shape(ys)) pos=list(years).index(Year) # if #need to think about what if window - span < 0 for i in spins: lo=max(0, pos-window+i) hi=min(len(ys)+1, pos+window+i) yslice=ys[lo:hi] # yrslice=years[lo:hi] # slpos=list(yrslice).index(Year) for j in range(int(100/(2*span+1))): bv=bivariate.bivariate(yslice, np.array([random.random() for y in yslice]), anomalise=False, pr=0.01) #(testdata,controldata, anomalise=False, pr=0.01) bvpos=bv.maxIndexTi() counts[lo+bvpos]+=1 return np.max(counts)/np.sum(counts) if __name__ == "__main__": ys=np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1]) ys=ys+np.array([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9]) years=np.array([1900+i for i in range(len(ys))]) bv1=bivariate.bivariate(ys, np.array([random.random() for y in ys]), anomalise=False, pr=0.01) print classify(ys, years, years[bv1.maxIndexTi()])