예제 #1
0
 def test_ug_6(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_LOW,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   guesses = [min, max]
   new = LogLocation(1234, datetime(datetime.now().year,  2, 13, 19, 0, 0),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   new.set_is_max(True)
   answer = [min, new]
   # update, is_max
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #2
0
 def test_ug_8(self):
   #//! impl in update_guess
   return
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   max.set_is_max(True)
   guesses = [min, max]
   new = LogLocation(1234, datetime(datetime.now().year,  2, 13, 19, 0, 0),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   answer = [min, max]
   # ignore, is_max already set for max
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #3
0
파일: tgrep-m.py 프로젝트: cole-brown/tgrep
def optimistic_edge_search(log, guess, times, results):
    """Reads a chunk and checks whole thing for timestamps. Better when it's really close."""
    global stats
    seek_loc = guess._seek_loc
    if guess.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH:
        # we're looking for the max and we're above it, so read from a chunk away to here.
        #    print "looking high"
        #    print "%d %d" % (guess._seek_loc, guess._seek_loc - EDGE_SWEEP_CHUNK_SIZE)
        seek_loc -= EDGE_SWEEP_CHUNK_SIZE
        seek_loc = 0 if seek_loc < 0 else seek_loc
        log.seek(seek_loc)
        stats['seeks'] += 1
    else:
        # we're looking for the min and we're below it, so read starting here.
        log.seek(seek_loc)
        stats['seeks'] += 1
    chunk = log.read(EDGE_SWEEP_CHUNK_SIZE)
    stats['reads'] += 1

    prev_minmax = guess.get_minmax()
    result = LogLocation(
        0, datetime.min, LogLocation.TOO_LOW,
        LogLocation.TOO_HIGH)  # an invalid result to start with
    chunk_loc = 0
    end_loc = chunk.rfind('\n')
    while chunk_loc < end_loc:
        #    print "%d / %d" % (seek_loc + chunk_loc, seek_loc + end_loc)
        try:
            # find the nearest newline so we can find the timestamp
            nl_index = chunk[chunk_loc:].find('\n')
            if nl_index == -1:
                #        print "can't find new line"
                break  # Can't find a newline; we're done.
            nl_index += 1  # get past the newline

            # find the first bit of the line, e.g. "Feb 14 05:52:12 web0"
            # split it on the whitespace, e.g. ["Feb", "14", "05:52:12", "web0"]
            # join the first three back together again with ' ' as the seperator
            # parse the thing!
            # //! need to research a better (faster?) way to do this
            time = parse_time(' '.join(
                chunk[chunk_loc + nl_index:chunk_loc + nl_index +
                      20].split()[:3]))  #//! magic 20

            chunk_loc += nl_index

            result._seek_loc = seek_loc + chunk_loc
            result._timestamp = time
            # compare to desired to see if it's a better max
            if time > times[1]:
                result._relation_to_desired_min = LogLocation.TOO_HIGH
                result._relation_to_desired_max = LogLocation.TOO_HIGH
                # check to see if it's the edge
                if prev_minmax[1] != LogLocation.TOO_HIGH:
                    # We passed out of range. This loc is where we want to /stop/ reading. Save it!
                    result.set_is_max(True)
#        print "short circuit"
                break  # Can short-circuit if find a max, since we're reading buff beginning-to-end.
            elif time == times[1]:
                # do nothing for now about data, may optimize to save off data later.
                result._relation_to_desired_max = LogLocation.MATCH
            else:  # time < times[1]
                result._relation_to_desired_max = LogLocation.TOO_LOW

            # and now the min
            if time < times[0]:
                result._relation_to_desired_min = LogLocation.TOO_LOW
            elif time == times[0]:
                # do nothing for now about data, may optimize to save off data later.
                result._relation_to_desired_min = LogLocation.MATCH
            else:  # time > times[0]
                result._relation_to_desired_min = LogLocation.TOO_HIGH

#      print result.get_minmax()

# see if we got the min edge (max was checked above)
            p = prev_minmax[0]
            r = result._relation_to_desired_min
            #      print "p: %d, r: %d" % (p,r)
            if (prev_minmax == LogLocation.OUT_OF_RANGE_LOW) and (
                    result.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH):
                pass  # //! No matches! Tell the dude and quit!
            elif (p == LogLocation.TOO_LOW) and (r != LogLocation.TOO_LOW):
                # We passed into our range via min. This is one.
                result.set_is_min(True)
                break

            prev_minmax = result.get_minmax()
        except ValueError:  # not a time string found
            print "time parse error"
            pass  # we're ok with occasional non-time string lines. Might start the read in the middle of a line, for example.
        finally:
            chunk_loc += nl_index

#  print result
    results.append(result)
예제 #4
0
파일: tgrep-m.py 프로젝트: spydez/tgrep
def optimistic_edge_search(log, guess, times, results):
  """Reads a chunk and checks whole thing for timestamps. Better when it's really close."""
  global stats
  seek_loc = guess._seek_loc
  if guess.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH:
    # we're looking for the max and we're above it, so read from a chunk away to here.
#    print "looking high"
#    print "%d %d" % (guess._seek_loc, guess._seek_loc - EDGE_SWEEP_CHUNK_SIZE)
    seek_loc -= EDGE_SWEEP_CHUNK_SIZE
    seek_loc = 0 if seek_loc < 0 else seek_loc
    log.seek(seek_loc)
    stats['seeks'] += 1
  else:
    # we're looking for the min and we're below it, so read starting here.
    log.seek(seek_loc)
    stats['seeks'] += 1
  chunk = log.read(EDGE_SWEEP_CHUNK_SIZE)
  stats['reads'] += 1

  prev_minmax = guess.get_minmax()
  result = LogLocation(0, datetime.min,
                       LogLocation.TOO_LOW,
                       LogLocation.TOO_HIGH) # an invalid result to start with
  chunk_loc = 0
  end_loc   = chunk.rfind('\n')
  while chunk_loc < end_loc:
#    print "%d / %d" % (seek_loc + chunk_loc, seek_loc + end_loc)
    try:
      # find the nearest newline so we can find the timestamp
      nl_index = chunk[chunk_loc:].find('\n')
      if nl_index == -1:
#        print "can't find new line"
        break # Can't find a newline; we're done.
      nl_index += 1 # get past the newline
      
      # find the first bit of the line, e.g. "Feb 14 05:52:12 web0"
      # split it on the whitespace, e.g. ["Feb", "14", "05:52:12", "web0"]
      # join the first three back together again with ' ' as the seperator
      # parse the thing!
      # //! need to research a better (faster?) way to do this
      time = parse_time(' '.join(chunk[chunk_loc + nl_index : chunk_loc + nl_index + 20].split()[:3])) #//! magic 20
    
      chunk_loc += nl_index

      result._seek_loc  = seek_loc + chunk_loc
      result._timestamp = time
      # compare to desired to see if it's a better max
      if time > times[1]:
        result._relation_to_desired_min = LogLocation.TOO_HIGH
        result._relation_to_desired_max = LogLocation.TOO_HIGH
        # check to see if it's the edge
        if prev_minmax[1] != LogLocation.TOO_HIGH:
          # We passed out of range. This loc is where we want to /stop/ reading. Save it!
          result.set_is_max(True)
#        print "short circuit"
        break  # Can short-circuit if find a max, since we're reading buff beginning-to-end.
      elif time == times[1]:
        # do nothing for now about data, may optimize to save off data later.
        result._relation_to_desired_max = LogLocation.MATCH
      else: # time < times[1]
        result._relation_to_desired_max = LogLocation.TOO_LOW

      # and now the min
      if time < times[0]:
        result._relation_to_desired_min = LogLocation.TOO_LOW
      elif time == times[0]:
        # do nothing for now about data, may optimize to save off data later.
        result._relation_to_desired_min = LogLocation.MATCH
      else: # time > times[0]
        result._relation_to_desired_min = LogLocation.TOO_HIGH

#      print result.get_minmax()

      # see if we got the min edge (max was checked above)
      p = prev_minmax[0]
      r = result._relation_to_desired_min
#      print "p: %d, r: %d" % (p,r)
      if (prev_minmax == LogLocation.OUT_OF_RANGE_LOW) and (result.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH):
        pass # //! No matches! Tell the dude and quit!
      elif (p == LogLocation.TOO_LOW) and (r != LogLocation.TOO_LOW):
        # We passed into our range via min. This is one.
        result.set_is_min(True)
        break

      prev_minmax = result.get_minmax()
    except ValueError: # not a time string found
      print "time parse error"
      pass # we're ok with occasional non-time string lines. Might start the read in the middle of a line, for example.
    finally:
      chunk_loc += nl_index

#  print result
  results.append(result)