Beispiel #1
0
  def __init__(self, ys, xs, years, model, pr=0.01, smooth=False, anom=False, onethreshold=True, trim=1, debug=False, ConstSxy=True, withshifts=False):
    try:
      #print "Pr size", pr, size
      self.__size=len(xs)
      #print self.__size, type(self.__size)
      self.__threshold=bivariate.critTi(pr, self.__size)
      self.__breakpoints=np.zeros((self.__size,))
      self.__breakyears = {}
    except Exception as e:
      print bivariate.ExceptionInfo()
      print "__init__",str(e)
      raise recurseTestException("__init__:"+str(e))
      
    if smooth:
      if anom:
        #print "window smooth anom"
        txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1)
        self.__ys = np.array(ys)-np.array(tys)      
        self.__xs=np.copy(xs)
      else:
        #print "window smooth not anom"
        txs, tys, td1, td2 = lowess.R().lowess(np.array(xs),np.array(ys), f=1./4., iter=1)
        self.__ys = np.array(ys)              
        self.__xs=np.array(tys)+np.array(xs)
    else:
      #print "window not smooth"
      self.__ys=np.array(ys)
      self.__xs=np.copy(xs)
    #print "window call bivariate"
    try:
      #self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=self.__threshold, anomalise=False,constantsxy=ConstSxy)
      self.__bv=bivariate.bivariate(self.__ys, self.__xs, critical=None, pr=pr, anomalise=False,constantsxy=ConstSxy)
      if withshifts:
        ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs), ap_shifts = self.__bv.allPoints(pr, withshifts=True)
      else:
        ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs)= self.__bv.allPoints(pr, withshifts=False)
      #print "BV.BIVARIATE -> ", len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs)
    except Exception as e:
      print bivariate.ExceptionInfo()
      raise recurseTestException("recurse.__init__ call to bivariate:"+str(e))  
    #print ap_MaxTis, ap_MaxIndexes, (ap_lows, ap_highs)
    
    
    if trim == 1:  
#==============================================================================
##       This mode implements a trimming mode whereby breakpoints tested by
##    bracketing them and accepting them if there ids a breakpoint in the interval
##    The alternative (mode 2) is to replace them with the new breakpoint    
#==============================================================================
    
      try:
        stepn = 0
        if debug: print "trimming"
        trimmed = True
        while trimmed:
          trimmed = False
          for mi in range(len(ap_MaxIndexes)):
            if debug: print "considering mi"
            if not trimmed:
              stepn = 1
              self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr)
              stepn=2
              #print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])
              stepn = 3
              if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]):
                stepn = 4
                trimmed = True
                if debug: print "trimmer removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)]
                stepn = 5
                #print mi, len(ap_MaxTis), len(ap_MaxIndexes), len(ap_lows), len(ap_highs)
                #print ap_MaxTis, " becomes ", 
                pop1 = ap_MaxTis.pop(int(mi))
                #print ap_MaxTis
                pop2 = ap_MaxIndexes.pop(int(mi))
                pop3 = ap_lows.pop(int(mi))
                pop4 = ap_highs.pop(int(mi))
                with open("removals.txt","a") as rmf:
                  rmf.write("from %s trimmer removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)]))))
      except Exception as e:
        raise recurseTestException("at trim state "+str(stepn)+" in __init__:"+str(e))

    if trim == 2:  
#==============================================================================
##       This mode implements a trimming mode whereby breakpoints tested by
##    bracketing them and accepting them if there ids a breakpoint in the interval
##    The alternative (mode 2) is to replace them with the new breakpoint    
#==============================================================================
    
      try:
        stepn = 0
        if debug: print "trimming mode 2"
        trimmed = True
        while trimmed:
          trimmed = False
          for mi in range(len(ap_MaxIndexes)):
            if debug: print "considering mi"
            if not trimmed:
              stepn = 1
              self.__bv.reinit(ap_lows[int(mi)], ap_highs[int(mi)]+1, pr)
              stepn=2
              print self.__bv.maxnTi() , ap_lows[int(mi)], ap_MaxIndexes[int(mi)], ap_highs[int(mi)], bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])
              if self.__bv.maxnTi() < bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]):  
                trimmed = True
                if debug: print "trimmer (mode 2) removed ", mi, ap_MaxIndexes[int(mi)], " between ", ap_lows[int(mi)], ap_highs[int(mi)]
                pop1 = ap_MaxTis.pop(int(mi))
                pop2 = ap_MaxIndexes.pop(int(mi))
                pop3 = ap_lows.pop(int(mi))
                pop4 = ap_highs.pop(int(mi))
                with open("removals.txt","a") as rmf:
                  rmf.write("from %s trimmer (mode 2) removed element %s (Ti:%s ) %s between %s and %s crit=%s \n" % (str(model), str(mi), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[min(len(ap_highs)-1,mi)]-ap_lows[min(len(ap_lows)-1,mi)]))))
              elif self.__bv.maxIndexnTi() != ap_MaxIndexes[int(mi)]:
                trimmed = True
                pop1 = ap_MaxTis[int(mi)]
                pop2 = ap_MaxIndexes[int(mi)]
                pop3 = ap_lows[int(mi)]
                pop4 = ap_highs[int(mi)]
                ap_MaxTis[int(mi)] = self.__bv.maxnTi()               
                ap_MaxIndexes[int(mi)] = self.__bv.maxIndexnTi()
                with open("removals.txt","a") as rmf:
                  rmf.write("from %s trimmer (mode 2) substituted element %s (Ti:%s ) %s with %s between %s and %s crit=%s \n" % (str(model), str(mi), str(ap_MaxIndexes[int(mi)]), str(pop1), str(pop2), str(pop3), str(pop4), str( bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)]))))
                
      except Exception as e:
        raise recurseTestException("at mode 2 trim state "+str(stepn)+" in __init__:"+str(e))

    try:  
      stepn = 0
      for mi in range(len(ap_MaxIndexes)):
        #print mi
        stepn = 1
        if ((onethreshold and (ap_MaxTis[int(mi)] >= self.__threshold)) or 
            (not onethreshold and (ap_MaxTis[int(mi)] >= 
                                  bivariate.critTi(pr, 1 + ap_highs[int(mi)]-ap_lows[int(mi)])))):
          self.__breakpoints[int(ap_MaxIndexes[int(mi)])] += 1
          stepn = 2
          self.__breakyears[years[int(ap_MaxIndexes[int(mi)])]] = None
          stepn = 3
          if withshifts:
            self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1], ap_shifts[mi])
          else:
            self.__breakyears[years[int(ap_MaxIndexes[mi])]] = (ap_MaxTis[mi], years[int(ap_lows[mi])], years[int(ap_highs[mi])-1])
          
    except Exception as e:
      print "Exception -----------------------------------------------------------"
      try:
        print "mi",mi
        #print "years",years
        print "ap_MaxIndexes",ap_MaxIndexes
        print "ap_MaxTis",ap_MaxTis
        print "ap_lows",ap_lows
        print "ap_highs",ap_highs
        print "self.__breakyears",self.__breakyears
        print "years[int(ap_MaxIndexes[int(mi)])]",years[int(ap_MaxIndexes[int(mi)])]
      except:
        pass
      print bivariate.ExceptionInfo()
      raise recurseTestException("Reporting loop of __init__: "+str(stepn)+" "+str(e))
  def merge_pass(currentbreaks, testdata, controldata, datayears):
    prevbreaks=copy.copy(currentbreaks)
    newlist=[prevbreaks.pop(0)]    
    lo=takeClosestIndex(datayears, newlist[0])
    print >>tf, "considering",str(prevbreaks)
    #JHR 13/2/15 Here we consider a prolog and an epilog since it ius necessary to test within the 
    #first and last spans, just to see if something is shielded.
    PrologDone=False
    
    try:
      if len(currentbreaks) < 2:
        print "merge_pass.currentbreaks=",currentbreaks
      if len(prevbreaks) > 1:
        hi = takeClosestIndex(datayears, prevbreaks[1])+1
      else: #JHR SVN 262 15/1/2015 If called with only the end data s bounds this will crash
        hi = takeClosestIndex(datayears, prevbreaks[0])+1
        PrologDone = True #so just prevent doing the same interval twice should we examine a provionally empty span
    except:
      print "merge_pass.prevbreaks=",prevbreaks,"merge_pass.currentbreaks=",currentbreaks
      raise
    #SVN 280 JHR 13/2/15 Consider
    firsthi = takeClosestIndex(datayears, prevbreaks[0])+1

    statlist = []
    state=0
    safetystep = 0
    Terminating = False #Flag to say we are on last pass
    while state >= 0: #state < 0 indicates termination
      
      #state evaluation loop is always get new list, then depending on state process
      if state==0: #ythen we have commenced
        #It should not happen, but was getting exceptions causing hi to be wrongin termination case
        #JHR 2/3/2015
        if Terminating:
          hi = takeClosestIndex(datayears, currentbreaks[-1])+1
        rt1=recursetest.recurse(testdata[lo:hi], controldata[lo:hi], datayears[lo:hi], model, smooth=False, trim = 0, pr=screenpr, anom=False, withshifts=True)
        candidates= np.sort(rt1.breakyears().keys()).tolist() 
        if Terminating and candidates != [] and trace:
          print >>tf, "Final pass found ",candidates, " between ", datayears[[lo,hi-1]], "state[0] -> state[1]"  
        if not PrologDone:
          PrologDone = True #so revert to previous code and commence testing spans
          rtprolog=recursetest.recurse(testdata[lo:firsthi], controldata[lo:firsthi], datayears[lo:firsthi], model, smooth=False, trim = 0, pr=screenpr, anom=False, withshifts=True)
          firstcandidates= np.sort(rtprolog.breakyears().keys()).tolist() 
          if firstcandidates != []:
            if trace:
              print >>tf, "B4 found ",firstcandidates, " between ", datayears[[lo,firsthi-1]], "state[0] -> state[0]"  
            prevbreaks.insert(0,firstcandidates[0])
            hi = firsthi
            state = 0
            #The following can cause deletion of a year, better to just start over 
            #firstcandidates.extend(candidates)
            #candidates=np.sort(list(set(firstcandidates))).tolist()
          else:
            state = 1
        else:
          state = 1 

      elif state == 1: #the state after we have a new list, candidates needs to be evaluated, firstly how many?
        if len(candidates) == 0:
          state = 2 #the state of do a drop 
        elif len(candidates) == 1:
          state = 3 # the state of process just one
        else:
          state = 4 # the state of process multiples

      elif state == 2: #then our test segment yielded no acceptable break and we will just drop it
        if trace:
          if len(prevbreaks) > 1:
            print >>tf, "** Break ",prevbreaks[1],"no longer between ",datayears[lo], datayears[hi-1], statelabel(state,5)
          else:
            print >>tf, "** Break ",prevbreaks,"no longer between ",datayears[lo], datayears[hi-1], statelabel(state,5)
        state = 5 #the state of a break was dropped by itself (therefore we don't move lo)

      elif state == 3:# the state of process just one
        #so now we set up an evaluation of one element.
        #we rely on candidates, lo, and hi
        modes=[None,None]
        ystats, tstats, shiftstats,modes = resample_break(testdata[lo:hi], datayears[lo:hi],N=100,withmode=True)
        #ystats, tstats, shiftstats = resample_break(testdata[lo:hi], datayears[lo:hi],withmode=False)
        #now the tstats may mean it is no longer significant
        #or the ystats may indicate it's too close to the previous
        #or there may be another issue unthought about
        #how about the Ti0?
        
        crit30=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo])))
        #now the retested point may have moved
        if modes[-2][0] < crit30: #then it is not significant (was tstats[0])
          if trace:
            print >>tf,  "-- Candidate ", candidates[0], "(",modes[0][0],")(",modes[-2][0],") did not exceed ", crit30, "between ",datayears[lo], datayears[hi-1], "state[",state,"]" ,
          state= 6 #the state of a break was dropped because it was a problem
          if trace:
            print >>tf, "-> state[",state,"]"        
        elif modes[0][0] -  (datayears[lo] -1) < minInterval:#JHR This was one year out, last break is datayears[0] -1 #was ystat[0] then it probably moved and in any case is too close to last break
          if trace:
            print >>tf,  "<< Candidate ", candidates[0], "(",modes[0][0],")(",modes[-2][0],") too close to ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]",
          state= 5.5 #the state of two peaks close togeter - choose 1
          if trace:
            print >>tf, "-> state[",state,"]"        
        # hold this thought
        #elif (ystats[0] - 2 * ystats[1] < min(testyr, datayears[lo]) or ystats[0] + 2 * ystats[1] > max(testyr, datayears[hi-1])):
        #now we consider a bunch of rules about the modality if we have done this
        elif modes[1] != None: #so there's more than one choice
          #if the first mode is more than 90% (Roger's verbal rule) the all is OK
          if modes[0][1] >= 0.9:
            state = 7 #the state of save the stats and then move on
          elif modes[0][1]+modes[1][1] < 0.7: #if less than 70% of the values are between two modes then it must be blurry
            if trace:
              print >>tf,  "~~ Candidate no strong modes ", candidates[0], "(",modes,") less than 70% ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]",
            state = 6  #the state of a break was dropped because it was a problem
            if trace:
              print >>tf, "-> state[",state,"]"        
          elif modes[1][1] <= 0.2: #so at half the time the mode is one value
            state = 7 #the state of save the stats and then move on
          elif abs(modes[0][0] - modes[1][0]) <= minInterval /2.0:
            if trace:
              print >>tf,  "~+ Candidate accepted with two modes ", candidates[0], "(",modes,") < minInterval /2.0 ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]",
            state = 7
            if trace:
              print >>tf, "-> state[",state,"]"        
          elif abs(modes[0][0] - modes[1][0]) > minInterval:
            #so for now we will override the candidate list and treat as two candidates _NOT ANY MORE
            if trace:
              print >>tf,  "~~ Candidate two strong modes - treat as both possible ", candidates[0], "(",modes,") > ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]",
            #candidates = [min(modes[0][0], modes[1][0])]#, max(modes[0][0], modes[1][0])]
            state = 9.5                  
            if trace:
              print >>tf, "-> state[",state,"]"        
          else:
            if trace:
              print >>tf,  "~~ Candidate two strong but close modes ", candidates[0], "(",modes,") > minInterval /2.0 ", datayears[lo], "between ",datayears[lo], datayears[hi-1] , "state[",state,"]",
            state = 7 #This was a definite error - had been dropping these. setting to state 6
            if trace:
              print >>tf, "-> state[",state,"]"        
          #that's basically it for mode processing
          #however one more thing to check              
        if state == 3: #final pass through
          if abs(candidates[0] - ystats[0]) > 2.0 * ystats[1]:#resampling shows bad point JHR: 20150107 BAD TEST FIXME!
            if trace:
              print >>tf,  "<<! Candidate ", candidates[0], "(",ystats,")(",tstats,") too far from ", candidates[0], "between ",datayears[lo], datayears[hi-1] , ". Will replace, state[",state,"]",
            state= 9 #the state of a break was changed due to resampleing
            if trace:
              print >>tf, "-> state[",state,"]"        
          else:
            state = 7 #the state of save the stats and then move on
        if state == 7 and trace:
            if candidates[0] != ystats[0]:
              print >>tf, "Candidate has moved : modes are " ,modes
            print >>tf,  "=3! Candidate ", candidates[0], "(",ystats,")(",tstats,") OK between ",datayears[lo], datayears[hi-1] , " state[3] - state[7]"
      elif state == 4:
        #test the second candidate to see is it's going to be stable. 
        #But we first need to know what the first candidate is.
      
      
        modes1=[None,None]
        lo1 = takeClosestIndex(datayears, candidates[0])+1        
        hi1 = takeClosestIndex(datayears, candidates[1])+1
        #SO WHAT IS the new lo bound?
        #If this one is above critical use it, otherwise don't. Use the second candidate which by virtue of how is was generated is likely to persist
        ystats0, tstats0, shiftstats0,modes0 = resample_break(testdata[lo:hi1], datayears[lo:hi1],N=100,withmode=True)
        lo0 = takeClosestIndex(datayears, modes0[0][0])+1
        #so now we use the new low bound against the proposed upper bound 
        ystats1, tstats1, shiftstats1,modes1 = resample_break(testdata[lo0:hi], datayears[lo0:hi],N=100,withmode=True)
        crit30a=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo0])))
        lowcandidate = candidates[0]
        if modes1[-2][0] >= crit30a: #then we save that one to use
          if trace:
            print >>tf,  "!-- Candidate ", candidates[0], " reset to ",modes0[0][0], " after resample gave ",modes0
          lowcandidate = modes0[0][0]
          lo1 = lo0
        else: #otherwise need to make a decision with the unchnaged candidate 
          ystats1, tstats1, shiftstats1,modes1 = resample_break(testdata[lo1:hi], datayears[lo1:hi],N=100,withmode=True)
          crit30a=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo1])))

        if modes1[-2][0] < crit30a: #then it is not significant (was tstats[0])
          if trace:
            print >>tf,  "!-- Candidate ", candidates[1], "(",modes1[0][0],")(",modes1[-2][0],") did not exceed ", crit30a, "between ",datayears[lo1], datayears[hi-1], " will not use: state[",state,"]" ,
        else:
          #insert       
          if trace and candidates[1] in prevbreaks:
            print >>tf, "KNOWN DUPLICATE GOING INTO LIST", candidates[1], prevbreaks, "THIS IS OK"  
          if trace:
            print >>tf,  "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", 
          prevbreaks.insert(1, candidates[1]) #avoid relooping
          if trace:
            print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]",
          #lo=takeClosestIndex(datayears, newlist[-1])+1
          if not Terminating: hi = takeClosestIndex(datayears, prevbreaks[1])+1
          
        candidates=[lowcandidate]
        state = 3
        if trace:
          print >>tf, "-> state[",state,"]"        

#      elif state == 4.1: #Old code
#        if trace:
#          print >>tf,  "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", 
#        prevbreaks.insert(1, candidates[1]) #avoid relooping
#        if trace:
#          print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]",
#        #lo=takeClosestIndex(datayears, newlist[-1])+1
#        hi = takeClosestIndex(datayears, prevbreaks[1])+1
#        candidates=[candidates[0]]
#        state = 3
#        if trace:
#          print >>tf, "-> state[",state,"]"        
#
#      elif state == 4.2: #working on
#        if trace:
#          print >>tf,  "++ (new) Candidate ", candidates[0], "will be tested and ", candidates[1],"inserted", prevbreaks, "becomes", 
#        prevbreaks[0]= candidates[0]
#        prevbreaks[1]= candidates[1] #avoid relooping
#        if trace:
#          print >>tf, prevbreaks, "between ",datayears[lo], datayears[hi-1], "state[",state,"]",
#        #lo=takeClosestIndex(datayears, newlist[-1])+1
#        hi = takeClosestIndex(datayears, prevbreaks[1])+1
#        state = 0
#        if trace:
#          print >>tf, "-> state[",state,"]"        
#

      elif state == 5:
        #the state of a break was dropped by itself (therefore we don't move lo)
        #actually we have to move it a bit to stop a loop if we then get it back - 
        #lo=takeClosestIndex(datayears, newlist[0])
        #do the dropping 
        if Terminating: 
          state = 10 # so avoid this processing
        else:
          prb=prevbreaks.pop(0)
          if len(prevbreaks) >1:
            hi = takeClosestIndex(datayears, prevbreaks[1])+1
            lo += minInterval/2
            if trace:
              old = hi-1
              print >>tf,  "!! Hi bound, ",datayears[old], " to give ",datayears[lo], datayears[hi-1], "state[",state,"]",
            state = 0
          else:
            if trace:
              print >>tf,  "!! Hi bound, ",datayears[hi-1], " last between",datayears[lo], datayears[hi-1], "state[",state,"]",
            state = 8 #nothing to do after dropping an upper bound
          if trace:
            print >>tf, "-> state[",state,"]"        

      elif state == 5.5:
        #the state of two peaks close togeter - choose 1 - if we choose the older one just drop this one
        #if we choose the later 1 delete reference to the prior one
        #similar code to state 3
        #but to prevent a loop we must prevent setting up the same interval again
        
        if len(statlist) == 0:
          state = 7 #accept it
          if trace: print >>tf,  "--!! Candidate ", candidates[0], "must be too close to start, but accept it, state[5.5] -> state[7]"
        else:
          if trace: print >>tf, "In state[5.5]"
          lo1=takeClosestIndex(datayears, newlist[-2])+1 #go back one step
          ystats, tstats, shiftstats,nmodes = resample_break(testdata[lo1:hi], datayears[lo1:hi], withmode=True)
          
          crit30=bivariate.critTi(pr, max(minInterval,min(30, 1+datayears[hi-1]-datayears[lo1])))
          #now the retested point may have moved
          if tstats[0] < crit30: #then it is not significant and this sia serious issue since one of these points previously was
            if trace:
              print >>tf,  "--!! Candidate ", candidates[0], "(",ystats,")(",tstats,") did not exceed ", crit30, "between ",datayears[lo], datayears[hi-1], "state[",state,"]" ,          
            state= 5 #the state of a break was dropped because it was a problem
            if trace:
              print >>tf, "->!! state[",state,"]"        
          else:
            if trace:
              
              print >>tf, "--!! go back to old break ",newlist[-2], "from ", datayears[lo], " candidate ",candidates[0], "new ", ystats[0], "between ",datayears[lo1], datayears[hi-1],"state[5.5]->",
            candidates=[round(ystats[0])]
            if ystats[0] -  datayears[lo] < minInterval:
              #we haven't moved very far, so set up a safety step
              safetystep = int(abs(ystats[0] -  datayears[lo]))
            if len(statlist) > 0: #if we have already got  
              try:
                newlist.pop(-1)
                statlist.pop(-1)
              #remove old dates
                state = 7 #the state of save the stats and then move on
              except: #we must not be able to pop, just delete it
                print >>tf, "no pop state [5.5] -> "
                state = 5
                pass
            else:
              state = 8 #nothing to do after dropping a candiate
            if trace: print >>tf, "state[",state,"]"
        #do the dropping 
        #alternative strategy when the new point is too close to the previous, go back and choose the 
        # highest Ti) of the two
      
     
      elif state == 6: #the state of a break was dropped because it was a problem so currently just update the indices past the identified candidate break
        #relies on candidate[0]
        if Terminating:
          state = 10
        else:
          oldlo = lo
          lo = takeClosestIndex(datayears, candidates[0])+1
          if len(prevbreaks) <2: #then cannot go further, we are done. JHR 15/1/2014 SVN262   
            if trace:
              print >>tf,  "$$ Lo bound ONLY updated to give ",datayears[lo], datayears[hi-1], "state[",state,"]",
            state = 0 #nothing to do after dropping an upper bound
            
          else:
            #need to see if we can move the upper bound safely retaining the low
            if len(prevbreaks)>2:
              hi3 = takeClosestIndex(datayears, prevbreaks[2])+1
            else:
              hi3 = takeClosestIndex(datayears, prevbreaks[1])+1
            ystats3, tstats3, shiftstats3,modes3 = resample_break(testdata[oldlo:hi3], datayears[oldlo:hi3],N=100,withmode=True)
            if modes3[1] == None or modes3[0][1]+modes3[1][1] >= 0.7:
              hi = hi3
              lo = oldlo
              state = 0 #no more to do but collect another point
              prb=prevbreaks.pop(0)
              if len(prevbreaks) >1 and prb == prevbreaks[0]: #JHR 17mar2015 was looping here.
                 prb=prevbreaks.pop(0) #remove the second instance if present to prevent a loop

              if trace:
                print >>tf,  "$$ Moved high bound, NOW ",datayears[oldlo] , datayears[hi-1], "dropped ", prb, "to give", prevbreaks, "state[",state,"]",
            
            #if this now violates the length rule update hi as well
            elif datayears[hi-1] - datayears[lo] < minInterval:
              if trace:
                print >>tf,  "$$ Lo bound, ",datayears[oldlo],"too close to Hi to use, cannot apply ",datayears[lo], datayears[hi-1], "state[",state,"]",
              state = 5#the state of a break was dropped by itself (therefore we don't move lo)
            else:
              if trace:
                print >>tf,  "$$ Lo bound, updated to give ",datayears[lo], datayears[hi-1], "state[",state,"]",
              state = 8 #nothing to do after dropping an upper bound
          if trace:
            print >>tf, "-> state[",state,"]"        

      elif state == 7:#the state of save the stats and then move on
        #we rely on candidates, lo, and hi, ystats, tstats, shiftstats
        if ystats == None:
          print "Trouble - this is a debug line "
        if not (Terminating and abs(currentbreaks[-1] -ystats[0]) <  minInterval):  
          statlist.append((ystats, tstats, shiftstats))
          newlist.append(int(round(ystats[0])))
        #newlist.append(candidates[0]) #Save the actual candidate
        if not Terminating: 
          lo = takeClosestIndex(datayears, newlist[-1])+1+safetystep
          if lo > len(datayears): lo -= safetystep          
          safetystep =0
          prb=prevbreaks.pop(0)
          if len(prevbreaks) >1 and prb == prevbreaks[0]:
            if trace:
              print >>tf, "Investigate : just popped", prb, prevbreaks, "state[7]"
            prb=prevbreaks.pop(0)  #JHR remove the duplicate as well. 10/2/2015
            
          if len(prevbreaks) >1:
            hi = takeClosestIndex(datayears, prevbreaks[1])+1
            state = 0 #round again
          else:
            state = 10 #nothing more to do
        else:
          state = 10
        if trace:
          print >>tf,  "\\\\", "Saved ",newlist[-1],statlist[-1], "state[7] -> state[",state,"]"
      
      elif state == 8: #nothing to do after dropping an upper bound
        if trace:
          print >>tf, "-- Update upper bound", datayears[hi-1], "to ",
        try:
          prb=prevbreaks.pop(0)
        except:
          if trace:
            print >>tf, "empty list",
          pass
        if not Terminating and len(prevbreaks) >1:
          hi = takeClosestIndex(datayears, prevbreaks[1])+1
          if trace:
            print >>tf,datayears[hi-1], "state[",state,"]",        
          state = 0 #round again
        else:
          state = 10 #nothing more to do
        if trace:
          print >>tf, "->","state[",state,"]"        
          
      elif state== 9: #the state of a break was changed due to resampleing
        if trace:
          print >>tf,  "&& candidate, ",candidates[0], " no longer updated to give ",
        #candidates[0] = round(ystats[0])
        if trace:
          print >>tf,round(ystats[0]),"state[",state,"]",
        state = 7
        if trace:
          print >>tf, "-> state[",state,"]"        

      elif state== 9.5: #the state of two strong modes record and exit
        if trace:
          print >>tf,  "&&-> candidate, ",candidates[0], "updated to give ",
        if modes[3][0] >= crit30:
          ystats=(modes[0][0],0.0)
          tstats=(modes[3][0], 0.0)
          shiftstats=(modes[3][1],0.0)
          candidates = [modes[0][0]]
          if trace:
            print >>tf, " FIRST MODE ",
        else:
          ystats=(modes[1][0],0.0)
          tstats=(modes[4][0], 0.0)
          shiftstats=(modes[4][1],0.0)
          candidates = [modes[1][0]]
          if trace:
            print >>tf, " SECOND MODE ",
        
        if trace:
          print >>tf,candidates[0],"state[",state,"]",
        state = 7
        if trace:
          print >>tf, "-> state[",state,"]"        

      elif state == 10:
        if Terminating:
          newlist.append(currentbreaks[-1])
          state = -1
        else:
          Terminating = True
          #lo = takeClosestIndex(datayears, newlist[-1])+1+safetystep
          #print "State 10 ",lo, hi 
          state = 0 #for last time
      elif state < 0:
        print state, newlist
    return newlist, statlist
Beispiel #3
0
def convergentBreaks_Inner(testdata, controldata, datayears, aicControl, model, trace=True, shallow=False, keepFirst=False):
  '''
  This code attempst to iteratively test all 
  '''
  #The issue with convergence is that whether a breakpoint is admitted to the yearly breaks is determined 
  #initial analysis over full data
  if trace:
      tf=open(TraceFile,"a")

  rt0=recursetest.recurse(testdata, controldata, datayears, model, smooth=False, trim = 0, pr=0.01, anom=False)
  
  oyears = rt0.breakyears() #initial set of breakpoints 
  byears = [datayears[0]]
  byears.extend([k for k in np.sort(oyears.keys())])
  #print "BYEARS", byears
  #byears=np.insert(byears, 0, datayears[0])
  
  if trace:
    print >>tf,model, "trace=",trace, "shallow=",shallow
    for i in range(len(testdata)):
      print >>tf,i, testdata[i], controldata[i], datayears[i], aicControl[i]
  byears.append(datayears[-1]) 
  if trace: print >>tf,"BYEARS", byears
  initialBreaks= copy.copy(byears)
  low=0
  crit30 = bivariate.critTi(0.01, 30)
  print "initially", np.sort(oyears.keys()), crit30
  testedlist = []
  statlist = []
  newbreaks=[datayears[0]] #the first list start is preserved
  fails = 0
  while len(byears) > 2:
    popped=byears.pop(0)
    testedlist.append(popped)
    lo=np.argwhere(datayears==newbreaks[-1])[0][0]
    hi=np.argwhere(datayears==byears[1])[0][0]+1
    testyr = byears[0]    
    print "try", datayears[lo], datayears[hi-1],
    rt1=recursetest.recurse(testdata[lo:hi], controldata[lo:hi], datayears[lo:hi], model, smooth=False, trim = 0, pr=0.01, anom=False)
    print rt1.breakyears().keys()
    nlist=rt1.breakyears().keys()
    nlist.sort()
    if fails > 0:
      print nlist, "shortened to ",nlist[fails:]
    nlist = nlist[fails:]
    lennlist=len(nlist)
    nlist.extend(byears[1:])
    
    byears=nlist[:]
    if lennlist == 0:
      print popped, " gone, revised list empty"
    else:
      if lennlist > 1: #then lo and hi need to be computed
        hi=np.argwhere(datayears==byears[1])[0][0]+1       
        testyr =byears[0] 
      ystats, tstats = resample_break(testdata[lo:hi], datayears[lo:hi])
      print popped, datayears[hi-1],ystats, tstats, byears[0],
      if tstats[0] + 2 * tstats[1] < crit30:
        print " **"
        if lennlist > 0:
          fails += 1
          
      else:
        if crit30 > tstats[0]:
          print " * "
        else:
          print "   ",
        if abs(testyr - ystats[0] ) -0.5 <= 2 * ystats[1]:
          print "YOK",
          #KeepOldDropNew
          newbreaks.append(round(ystats[0],0))
          statlist.append((ystats, tstats))
          fails = 0
        else:
          print " Y* ",
          #So does the spread of possible break years actually exceed the possible bounds - this is a sign of instability or trend, or badness, or we may already have this point eqarlier
          if ystats[0] - 2 * ystats[1] < min(testyr, datayears[lo]) or ystats[0] + 2 * ystats[1] > max(testyr, datayears[hi-1]):
            print" drop", testyr,"likehotpotato"
            if lennlist > 0: fails += 1
          else:
            byears[0] = round(ystats[0],0)
            newbreaks.append(byears[0])
            statlist.append((ystats, tstats))
            print "revised",testyr,"to",byears[0]
            fails = 0
  newbreaks.append(datayears[-1])
  
  print "\nReturning",initialBreaks, "->", newbreaks, statlist
  return initialBreaks, newbreaks, statlist