def __init__(self, ys, xs, years, model, pr=0.01, smooth=False, anom=False, onethreshold=True, trim=1, debug=False, ConstSxy=True, withshifts=False): try: #print "Pr size", pr, size self.__size=len(xs) #print self.__size, type(self.__size) self.__threshold=bivariate.critTi(pr, self.__size) self.__breakpoints=np.zeros((self.__size,)) self.__breakyears = {} except Exception as e: print bivariate.ExceptionInfo() print "__init__",str(e) raise recurseTestException("__init__:"+str(e)) if smooth: if anom: #print "window smooth anom" txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1) self.__ys = np.array(ys)-np.array(tys) self.__xs=np.copy(xs) else: #print "window smooth not anom" txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1) self.__ys = np.array(ys) self.__xs=np.array(tys)+np.array(xs) else: #print "window not smooth" self.__ys=np.array(ys) self.__xs=np.copy(xs) #print "window call bivariate" try: #self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=self.__threshold, anomalise=False,constantsxy=ConstSxy) self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=None, pr=pr, anomalise=False,constantsxy=ConstSxy) if withshifts: ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs), ap_shifts = self.__bv.allPoints(pr, withshifts=True) else: ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs)= self.__bv.allPoints(pr, withshifts=False) #print "BV.BIVARIATE -> ", len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs) except Exception as e: print bivariate.ExceptionInfo() raise recurseTestException("recurse.__init__ call to bivariate:"+str(e)) #print ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs) if trim == 1: #============================================================================== ## This mode implements a trimming mode whereby breakpoints tested by ## bracketing them and accepting them if there ids a breakpoint in the interval ## The alternative (mode 2) is to replace them with the new breakpoint #============================================================================== try: stepn = 0 if debug: print "trimming" trimmed = True while trimmed: trimmed = False for mi in range(len(ap_MaxIndexes)): if debug: print "considering mi" if not trimmed: stepn = 1 self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr) stepn=2 #print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]) stepn = 3 if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]): stepn = 4 trimmed = True if debug: print "trimmer removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)] stepn = 5 #print mi, len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs) #print ap_MaxTis, " becomes ", pop1 = ap_MaxTis.pop(int(mi)) #print ap_MaxTis pop2 = ap_MaxIndexes.pop(int(mi)) pop3 = ap_lows.pop(int(mi)) pop4 = ap_highs.pop(int(mi)) with open("removals.txt","a") as rmf: rmf.write("from %s trimmer removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)])))) except Exception as e: raise recurseTestException("at trim state "+str(stepn)+" in __init__:"+str(e)) if trim == 2: #============================================================================== ## This mode implements a trimming mode whereby breakpoints tested by ## bracketing them and accepting them if there ids a breakpoint in the interval ## The alternative (mode 2) is to replace them with the new breakpoint #============================================================================== try: stepn = 0 if debug: print "trimming mode 2" trimmed = True while trimmed: trimmed = False for mi in range(len(ap_MaxIndexes)): if debug: print "considering mi" if not trimmed: stepn = 1 self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr) stepn=2 print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]) if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]): trimmed = True if debug: print "trimmer (mode 2) removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)] pop1 = ap_MaxTis.pop(int(mi)) pop2 = ap_MaxIndexes.pop(int(mi)) pop3 = ap_lows.pop(int(mi)) pop4 = ap_highs.pop(int(mi)) with open("removals.txt","a") as rmf: rmf.write("from %s trimmer (mode 2) removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)])))) elif self.__bv.maxIndexnTi() != ap_MaxIndexes[int(mi)]: trimmed = True pop1 = ap_MaxTis[int(mi)] pop2 = ap_MaxIndexes[int(mi)] pop3 = ap_lows[int(mi)] pop4 = ap_highs[int(mi)] ap_MaxTis[int(mi)] = self.__bv.maxnTi() ap_MaxIndexes[int(mi)] = self.__bv.maxIndexnTi() with open("removals.txt","a") as rmf: rmf.write("from %s trimmer (mode 2) substituted element %s (Ti:%s ) %s with %s between %s and %s crit=%s \n" % (str(model), str(mi), str(ap_MaxIndexes[int(mi)]), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])))) except Exception as e: raise recurseTestException("at mode 2 trim state "+str(stepn)+" in __init__:"+str(e)) try: stepn = 0 for mi in range(len(ap_MaxIndexes)): #print mi stepn = 1 if ((onethreshold and (ap_MaxTis[int(mi)] >= self.__threshold)) or (not onethreshold and (ap_MaxTis[int(mi)] >= bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])))): self.__breakpoints[int(ap_MaxIndexes[int(mi)])] += 1 stepn = 2 self.__breakyears[years[int(ap_MaxIndexes[int(mi)])]] = None stepn = 3 if withshifts: self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1], ap_shifts[mi]) else: self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1]) except Exception as e: print "Exception -----------------------------------------------------------" try: print "mi",mi #print "years",years print "ap_MaxIndexes",ap_MaxIndexes print "ap_MaxTis",ap_MaxTis print "ap_lows",ap_lows print "ap_highs",ap_highs print "self.__breakyears",self.__breakyears print "years[int(ap_MaxIndexes[int(mi)])]",years[int(ap_MaxIndexes[int(mi)])] except: pass print bivariate.ExceptionInfo() raise recurseTestException("Reporting loop of __init__: "+str(stepn)+" "+str(e))
def merge_pass(currentbreaks, testdata, controldata, datayears): prevbreaks=copy.copy(currentbreaks) newlist=[prevbreaks.pop(0)] lo=takeClosestIndex(datayears, newlist[0]) print >>tf, "considering",str(prevbreaks) #JHR 13/2/15 Here we consider a prolog and an epilog since it ius necessary to test within the #first and last spans, just to see if something is shielded. PrologDone=False try: if len(currentbreaks) < 2: print "merge_pass.currentbreaks=",currentbreaks if len(prevbreaks) > 1: hi = takeClosestIndex(datayears, prevbreaks[1])+1 else: #JHR SVN 262 15/1/2015 If called with only the end data s bounds this will crash hi = takeClosestIndex(datayears, prevbreaks[0])+1 PrologDone = True #so just prevent doing the same interval twice should we examine a provionally empty span except: print "merge_pass.prevbreaks=",prevbreaks,"merge_pass.currentbreaks=",currentbreaks raise #SVN 280 JHR 13/2/15 Consider firsthi = takeClosestIndex(datayears, prevbreaks[0])+1 statlist = [] state=0 safetystep = 0 Terminating = False #Flag to say we are on last pass while state >= 0: #state < 0 indicates termination #state evaluation loop is always get new list, then depending on state process if state==0: #ythen we have commenced #It should not happen, but was getting exceptions causing hi to be wrongin termination case #JHR 2/3/2015 if Terminating: hi = takeClosestIndex(datayears, currentbreaks[-1])+1 rt1=recursetest.recurse(testdata[lo:hi], controldata[lo:hi], datayears[lo:hi], model, smooth=False, trim = 0, pr=screenpr, anom=False, withshifts=True) candidates= np.sort(rt1.breakyears().keys()).tolist() if Terminating and candidates != [] and trace: print >>tf, "Final pass found ",candidates, " between ", datayears[[lo,hi-1]], "state[0] -> state[1]" if not PrologDone: PrologDone = True #so revert to previous code and commence testing spans rtprolog=recursetest.recurse(testdata[lo:firsthi], controldata[lo:firsthi], datayears[lo:firsthi], model, smooth=False, trim = 0, pr=screenpr, anom=False, withshifts=True) firstcandidates= np.sort(rtprolog.breakyears().keys()).tolist() if firstcandidates != []: if trace: print >>tf, "B4 found ",firstcandidates, " between ", datayears[[lo,firsthi-1]], "state[0] -> state[0]" prevbreaks.insert(0,firstcandidates[0]) hi = firsthi state = 0 #The following can cause deletion of a year, better to just start over #firstcandidates.extend(candidates) #candidates=np.sort(list(set(firstcandidates))).tolist() else: state = 1 else: state = 1 elif state == 1: #the state after we have a new list, candidates needs to be evaluated, firstly how many? if len(candidates) == 0: state = 2 #the state of do a drop elif len(candidates) == 1: state = 3 # the state of process just one else: state = 4 # the state of process multiples elif state == 2: #then our test segment yielded no acceptable break and we will just drop it if trace: if len(prevbreaks) > 1: print >>tf, "** Break ",prevbreaks[1],"no longer between ",datayears[lo], datayears[hi-1], statelabel(state,5) else: print >>tf, "** Break ",prevbreaks,"no longer between ",datayears[lo], datayears[hi-1], statelabel(state,5) state = 5 #the state of a break was dropped by itself (therefore we don't move lo) elif state == 3:# the state of process just one #so now we set up an evaluation of one element. #we rely on candidates, lo, and hi modes=[None,None] ystats, tstats, shiftstats,modes = resample_break(testdata[lo:hi], datayears[lo:hi],N=100,withmode=True) #ystats, tstats, shiftstats = resample_break(testdata[lo:hi], datayears[lo:hi],withmode=False) #now the tstats may mean it is no longer significant #or the ystats may indicate it's too close to the previous #or there may be another issue unthought about #how about the Ti0? crit30=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo]))) #now the retested point may have moved if modes[-2][0] < crit30: #then it is not significant (was tstats[0]) if trace: print >>tf, "-- Candidate ", candidates[0], "(",modes[0][0],")(",modes[-2][0],") did not exceed ", crit30, "between ",datayears[lo], datayears[hi-1], "state[",state,"]" , state= 6 #the state of a break was dropped because it was a problem if trace: print >>tf, "-> state[",state,"]" elif modes[0][0] - (datayears[lo] -1) < minInterval:#JHR This was one year out, last break is datayears[0] -1 #was ystat[0] then it probably moved and in any case is too close to last break if trace: print >>tf, "<< Candidate ", candidates[0], "(",modes[0][0],")(",modes[-2][0],") too close to ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]", state= 5.5 #the state of two peaks close togeter - choose 1 if trace: print >>tf, "-> state[",state,"]" # hold this thought #elif (ystats[0] - 2 * ystats[1] < min(testyr, datayears[lo]) or ystats[0] + 2 * ystats[1] > max(testyr, datayears[hi-1])): #now we consider a bunch of rules about the modality if we have done this elif modes[1] != None: #so there's more than one choice #if the first mode is more than 90% (Roger's verbal rule) the all is OK if modes[0][1] >= 0.9: state = 7 #the state of save the stats and then move on elif modes[0][1]+modes[1][1] < 0.7: #if less than 70% of the values are between two modes then it must be blurry if trace: print >>tf, "~~ Candidate no strong modes ", candidates[0], "(",modes,") less than 70% ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]", state = 6 #the state of a break was dropped because it was a problem if trace: print >>tf, "-> state[",state,"]" elif modes[1][1] <= 0.2: #so at half the time the mode is one value state = 7 #the state of save the stats and then move on elif abs(modes[0][0] - modes[1][0]) <= minInterval /2.0: if trace: print >>tf, "~+ Candidate accepted with two modes ", candidates[0], "(",modes,") < minInterval /2.0 ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]", state = 7 if trace: print >>tf, "-> state[",state,"]" elif abs(modes[0][0] - modes[1][0]) > minInterval: #so for now we will override the candidate list and treat as two candidates _NOT ANY MORE if trace: print >>tf, "~~ Candidate two strong modes - treat as both possible ", candidates[0], "(",modes,") > ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]", #candidates = [min(modes[0][0], modes[1][0])]#, max(modes[0][0], modes[1][0])] state = 9.5 if trace: print >>tf, "-> state[",state,"]" else: if trace: print >>tf, "~~ Candidate two strong but close modes ", candidates[0], "(",modes,") > minInterval /2.0 ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]", state = 7 #This was a definite error - had been dropping these. setting to state 6 if trace: print >>tf, "-> state[",state,"]" #that's basically it for mode processing #however one more thing to check if state == 3: #final pass through if abs(candidates[0] - ystats[0]) > 2.0 * ystats[1]:#resampling shows bad point JHR: 20150107 BAD TEST FIXME! if trace: print >>tf, "<<! Candidate ", candidates[0], "(",ystats,")(",tstats,") too far from ", candidates[0], "between ",datayears[lo], datayears[hi-1] , ". Will replace, state[",state,"]", state= 9 #the state of a break was changed due to resampleing if trace: print >>tf, "-> state[",state,"]" else: state = 7 #the state of save the stats and then move on if state == 7 and trace: if candidates[0] != ystats[0]: print >>tf, "Candidate has moved : modes are " ,modes print >>tf, "=3! Candidate ", candidates[0], "(",ystats,")(",tstats,") OK between ",datayears[lo], datayears[hi-1] , " state[3] - state[7]" elif state == 4: #test the second candidate to see is it's going to be stable. #But we first need to know what the first candidate is. modes1=[None,None] lo1 = takeClosestIndex(datayears, candidates[0])+1 hi1 = takeClosestIndex(datayears, candidates[1])+1 #SO WHAT IS the new lo bound? #If this one is above critical use it, otherwise don't. Use the second candidate which by virtue of how is was generated is likely to persist ystats0, tstats0, shiftstats0,modes0 = resample_break(testdata[lo:hi1], datayears[lo:hi1],N=100,withmode=True) lo0 = takeClosestIndex(datayears, modes0[0][0])+1 #so now we use the new low bound against the proposed upper bound ystats1, tstats1, shiftstats1,modes1 = resample_break(testdata[lo0:hi], datayears[lo0:hi],N=100,withmode=True) crit30a=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo0]))) lowcandidate = candidates[0] if modes1[-2][0] >= crit30a: #then we save that one to use if trace: print >>tf, "!-- Candidate ", candidates[0], " reset to ",modes0[0][0], " after resample gave ",modes0 lowcandidate = modes0[0][0] lo1 = lo0 else: #otherwise need to make a decision with the unchnaged candidate ystats1, tstats1, shiftstats1,modes1 = resample_break(testdata[lo1:hi], datayears[lo1:hi],N=100,withmode=True) crit30a=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo1]))) if modes1[-2][0] < crit30a: #then it is not significant (was tstats[0]) if trace: print >>tf, "!-- Candidate ", candidates[1], "(",modes1[0][0],")(",modes1[-2][0],") did not exceed ", crit30a, "between ",datayears[lo1], datayears[hi-1], " will not use: state[",state,"]" , else: #insert if trace and candidates[1] in prevbreaks: print >>tf, "KNOWN DUPLICATE GOING INTO LIST", candidates[1], prevbreaks, "THIS IS OK" if trace: print >>tf, "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", prevbreaks.insert(1, candidates[1]) #avoid relooping if trace: print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]", #lo=takeClosestIndex(datayears, newlist[-1])+1 if not Terminating: hi = takeClosestIndex(datayears, prevbreaks[1])+1 candidates=[lowcandidate] state = 3 if trace: print >>tf, "-> state[",state,"]" # elif state == 4.1: #Old code # if trace: # print >>tf, "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", # prevbreaks.insert(1, candidates[1]) #avoid relooping # if trace: # print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]", # #lo=takeClosestIndex(datayears, newlist[-1])+1 # hi = takeClosestIndex(datayears, prevbreaks[1])+1 # candidates=[candidates[0]] # state = 3 # if trace: # print >>tf, "-> state[",state,"]" # # elif state == 4.2: #working on # if trace: # print >>tf, "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", # prevbreaks[0]= candidates[0] # prevbreaks[1]= candidates[1] #avoid relooping # if trace: # print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]", # #lo=takeClosestIndex(datayears, newlist[-1])+1 # hi = takeClosestIndex(datayears, prevbreaks[1])+1 # state = 0 # if trace: # print >>tf, "-> state[",state,"]" # elif state == 5: #the state of a break was dropped by itself (therefore we don't move lo) #actually we have to move it a bit to stop a loop if we then get it back - #lo=takeClosestIndex(datayears, newlist[0]) #do the dropping if Terminating: state = 10 # so avoid this processing else: prb=prevbreaks.pop(0) if len(prevbreaks) >1: hi = takeClosestIndex(datayears, prevbreaks[1])+1 lo += minInterval/2 if trace: old = hi-1 print >>tf, "!! Hi bound, ",datayears[old], " to give ",datayears[lo], datayears[hi-1], "state[",state,"]", state = 0 else: if trace: print >>tf, "!! Hi bound, ",datayears[hi-1], " last between",datayears[lo], datayears[hi-1], "state[",state,"]", state = 8 #nothing to do after dropping an upper bound if trace: print >>tf, "-> state[",state,"]" elif state == 5.5: #the state of two peaks close togeter - choose 1 - if we choose the older one just drop this one #if we choose the later 1 delete reference to the prior one #similar code to state 3 #but to prevent a loop we must prevent setting up the same interval again if len(statlist) == 0: state = 7 #accept it if trace: print >>tf, "--!! Candidate ", candidates[0], "must be too close to start, but accept it, state[5.5] -> state[7]" else: if trace: print >>tf, "In state[5.5]" lo1=takeClosestIndex(datayears, newlist[-2])+1 #go back one step ystats, tstats, shiftstats,nmodes = resample_break(testdata[lo1:hi], datayears[lo1:hi], withmode=True) crit30=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo1]))) #now the retested point may have moved if tstats[0] < crit30: #then it is not significant and this sia serious issue since one of these points previously was if trace: print >>tf, "--!! Candidate ", candidates[0], "(",ystats,")(",tstats,") did not exceed ", crit30, "between ",datayears[lo], datayears[hi-1], "state[",state,"]" , state= 5 #the state of a break was dropped because it was a problem if trace: print >>tf, "->!! state[",state,"]" else: if trace: print >>tf, "--!! go back to old break ",newlist[-2], "from ", datayears[lo], " candidate ",candidates[0], "new ", ystats[0], "between ",datayears[lo1], datayears[hi-1],"state[5.5]->", candidates=[round(ystats[0])] if ystats[0] - datayears[lo] < minInterval: #we haven't moved very far, so set up a safety step safetystep = int(abs(ystats[0] - datayears[lo])) if len(statlist) > 0: #if we have already got try: newlist.pop(-1) statlist.pop(-1) #remove old dates state = 7 #the state of save the stats and then move on except: #we must not be able to pop, just delete it print >>tf, "no pop state [5.5] -> " state = 5 pass else: state = 8 #nothing to do after dropping a candiate if trace: print >>tf, "state[",state,"]" #do the dropping #alternative strategy when the new point is too close to the previous, go back and choose the # highest Ti) of the two elif state == 6: #the state of a break was dropped because it was a problem so currently just update the indices past the identified candidate break #relies on candidate[0] if Terminating: state = 10 else: oldlo = lo lo = takeClosestIndex(datayears, candidates[0])+1 if len(prevbreaks) <2: #then cannot go further, we are done. JHR 15/1/2014 SVN262 if trace: print >>tf, "$$ Lo bound ONLY updated to give ",datayears[lo], datayears[hi-1], "state[",state,"]", state = 0 #nothing to do after dropping an upper bound else: #need to see if we can move the upper bound safely retaining the low if len(prevbreaks)>2: hi3 = takeClosestIndex(datayears, prevbreaks[2])+1 else: hi3 = takeClosestIndex(datayears, prevbreaks[1])+1 ystats3, tstats3, shiftstats3,modes3 = resample_break(testdata[oldlo:hi3], datayears[oldlo:hi3],N=100,withmode=True) if modes3[1] == None or modes3[0][1]+modes3[1][1] >= 0.7: hi = hi3 lo = oldlo state = 0 #no more to do but collect another point prb=prevbreaks.pop(0) if len(prevbreaks) >1 and prb == prevbreaks[0]: #JHR 17mar2015 was looping here. prb=prevbreaks.pop(0) #remove the second instance if present to prevent a loop if trace: print >>tf, "$$ Moved high bound, NOW ",datayears[oldlo] , datayears[hi-1], "dropped ", prb, "to give", prevbreaks, "state[",state,"]", #if this now violates the length rule update hi as well elif datayears[hi-1] - datayears[lo] < minInterval: if trace: print >>tf, "$$ Lo bound, ",datayears[oldlo],"too close to Hi to use, cannot apply ",datayears[lo], datayears[hi-1], "state[",state,"]", state = 5#the state of a break was dropped by itself (therefore we don't move lo) else: if trace: print >>tf, "$$ Lo bound, updated to give ",datayears[lo], datayears[hi-1], "state[",state,"]", state = 8 #nothing to do after dropping an upper bound if trace: print >>tf, "-> state[",state,"]" elif state == 7:#the state of save the stats and then move on #we rely on candidates, lo, and hi, ystats, tstats, shiftstats if ystats == None: print "Trouble - this is a debug line " if not (Terminating and abs(currentbreaks[-1] -ystats[0]) < minInterval): statlist.append((ystats, tstats, shiftstats)) newlist.append(int(round(ystats[0]))) #newlist.append(candidates[0]) #Save the actual candidate if not Terminating: lo = takeClosestIndex(datayears, newlist[-1])+1+safetystep if lo > len(datayears): lo -= safetystep safetystep =0 prb=prevbreaks.pop(0) if len(prevbreaks) >1 and prb == prevbreaks[0]: if trace: print >>tf, "Investigate : just popped", prb, prevbreaks, "state[7]" prb=prevbreaks.pop(0) #JHR remove the duplicate as well. 10/2/2015 if len(prevbreaks) >1: hi = takeClosestIndex(datayears, prevbreaks[1])+1 state = 0 #round again else: state = 10 #nothing more to do else: state = 10 if trace: print >>tf, "\\\\", "Saved ",newlist[-1],statlist[-1], "state[7] -> state[",state,"]" elif state == 8: #nothing to do after dropping an upper bound if trace: print >>tf, "-- Update upper bound", datayears[hi-1], "to ", try: prb=prevbreaks.pop(0) except: if trace: print >>tf, "empty list", pass if not Terminating and len(prevbreaks) >1: hi = takeClosestIndex(datayears, prevbreaks[1])+1 if trace: print >>tf,datayears[hi-1], "state[",state,"]", state = 0 #round again else: state = 10 #nothing more to do if trace: print >>tf, "->","state[",state,"]" elif state== 9: #the state of a break was changed due to resampleing if trace: print >>tf, "&& candidate, ",candidates[0], " no longer updated to give ", #candidates[0] = round(ystats[0]) if trace: print >>tf,round(ystats[0]),"state[",state,"]", state = 7 if trace: print >>tf, "-> state[",state,"]" elif state== 9.5: #the state of two strong modes record and exit if trace: print >>tf, "&&-> candidate, ",candidates[0], "updated to give ", if modes[3][0] >= crit30: ystats=(modes[0][0],0.0) tstats=(modes[3][0], 0.0) shiftstats=(modes[3][1],0.0) candidates = [modes[0][0]] if trace: print >>tf, " FIRST MODE ", else: ystats=(modes[1][0],0.0) tstats=(modes[4][0], 0.0) shiftstats=(modes[4][1],0.0) candidates = [modes[1][0]] if trace: print >>tf, " SECOND MODE ", if trace: print >>tf,candidates[0],"state[",state,"]", state = 7 if trace: print >>tf, "-> state[",state,"]" elif state == 10: if Terminating: newlist.append(currentbreaks[-1]) state = -1 else: Terminating = True #lo = takeClosestIndex(datayears, newlist[-1])+1+safetystep #print "State 10 ",lo, hi state = 0 #for last time elif state < 0: print state, newlist return newlist, statlist
def convergentBreaks_Inner(testdata, controldata, datayears, aicControl, model, trace=True, shallow=False, keepFirst=False): ''' This code attempst to iteratively test all ''' #The issue with convergence is that whether a breakpoint is admitted to the yearly breaks is determined #initial analysis over full data if trace: tf=open(TraceFile,"a") rt0=recursetest.recurse(testdata, controldata, datayears, model, smooth=False, trim = 0, pr=0.01, anom=False) oyears = rt0.breakyears() #initial set of breakpoints byears = [datayears[0]] byears.extend([k for k in np.sort(oyears.keys())]) #print "BYEARS", byears #byears=np.insert(byears, 0, datayears[0]) if trace: print >>tf,model, "trace=",trace, "shallow=",shallow for i in range(len(testdata)): print >>tf,i, testdata[i], controldata[i], datayears[i], aicControl[i] byears.append(datayears[-1]) if trace: print >>tf,"BYEARS", byears initialBreaks= copy.copy(byears) low=0 crit30 = bivariate.critTi(0.01, 30) print "initially", np.sort(oyears.keys()), crit30 testedlist = [] statlist = [] newbreaks=[datayears[0]] #the first list start is preserved fails = 0 while len(byears) > 2: popped=byears.pop(0) testedlist.append(popped) lo=np.argwhere(datayears==newbreaks[-1])[0][0] hi=np.argwhere(datayears==byears[1])[0][0]+1 testyr = byears[0] print "try", datayears[lo], datayears[hi-1], rt1=recursetest.recurse(testdata[lo:hi], controldata[lo:hi], datayears[lo:hi], model, smooth=False, trim = 0, pr=0.01, anom=False) print rt1.breakyears().keys() nlist=rt1.breakyears().keys() nlist.sort() if fails > 0: print nlist, "shortened to ",nlist[fails:] nlist = nlist[fails:] lennlist=len(nlist) nlist.extend(byears[1:]) byears=nlist[:] if lennlist == 0: print popped, " gone, revised list empty" else: if lennlist > 1: #then lo and hi need to be computed hi=np.argwhere(datayears==byears[1])[0][0]+1 testyr =byears[0] ystats, tstats = resample_break(testdata[lo:hi], datayears[lo:hi]) print popped, datayears[hi-1],ystats, tstats, byears[0], if tstats[0] + 2 * tstats[1] < crit30: print " **" if lennlist > 0: fails += 1 else: if crit30 > tstats[0]: print " * " else: print " ", if abs(testyr - ystats[0] ) -0.5 <= 2 * ystats[1]: print "YOK", #KeepOldDropNew newbreaks.append(round(ystats[0],0)) statlist.append((ystats, tstats)) fails = 0 else: print " Y* ", #So does the spread of possible break years actually exceed the possible bounds - this is a sign of instability or trend, or badness, or we may already have this point eqarlier if ystats[0] - 2 * ystats[1] < min(testyr, datayears[lo]) or ystats[0] + 2 * ystats[1] > max(testyr, datayears[hi-1]): print" drop", testyr,"likehotpotato" if lennlist > 0: fails += 1 else: byears[0] = round(ystats[0],0) newbreaks.append(byears[0]) statlist.append((ystats, tstats)) print "revised",testyr,"to",byears[0] fails = 0 newbreaks.append(datayears[-1]) print "\nReturning",initialBreaks, "->", newbreaks, statlist return initialBreaks, newbreaks, statlist