예제 #1
0
 def test_ug_3(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_LOW,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   guesses = [min, max]
   new = LogLocation(3779999999, datetime(datetime.now().year,  2, 16, 22, 26, 26),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   answer = [min, max]
   # no update, too high
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #2
0
 def test_ug_4(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_LOW,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   guesses = [min, max]
   new = LogLocation(12345, datetime(datetime.now().year,  2, 13, 19, 0, 0),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_LOW)
   answer = [min, max]
   # no update, in range
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #3
0
 def test_ug_2(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_LOW,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   guesses = [min, max]
   new = LogLocation(2220000, datetime(datetime.now().year,  2, 14, 6, 6, 6),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   answer = [min, new]
   # update high
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #4
0
파일: tgrep-m.py 프로젝트: cole-brown/tgrep
def pessismistic_binary_search(log, seek_loc, times, results, arr):
    """Reads only a little and checks only the first timestamp. Better when it's way off base."""
    log.seek(seek_loc)
    arr[0] += 1
    chunk = log.read(MORE_THAN_ONE_LINE)
    arr[1] += 1

    # find the nearest newline so we can find the timestamp
    nl_index = chunk.find("\n")
    if nl_index == -1:
        results.put(None)
        return
        # //! better error case?
    nl_index += 1  # get past the newline

    # find the first bit of the line, e.g. "Feb 14 05:52:12 web0"
    # split it on the whitespace, e.g. ["Feb", "14", "05:52:12", "web0"]
    # join the first three back together again with ' ' as the seperator
    # parse the thing!
    # //! need to research a better (faster?) way to do this
    time = parse_time(' '.join(chunk[nl_index:nl_index +
                                     20].split()[:3]))  #//! magic 20

    result = LogLocation(
        seek_loc,
        time,  # from before, no change
        logloc.time_cmp(time, times[0]),  # how it compares, min
        logloc.time_cmp(time, times[1]))  # how it compares, max
    results.put(result)
예제 #5
0
 def test_bsg(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 23, 33, 11),
                     LogLocation.MATCH,
                     LogLocation.MATCH)
   max = LogLocation(10, datetime(datetime.now().year,  2, 13, 23, 33, 15),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   bounds = [min, max]
   foo = None # ignored anyways
   self.assertEquals(  5, tgrep.binary_search_guess(bounds, foo))
   bounds[1].set_loc(100)
   self.assertEquals( 50, tgrep.binary_search_guess(bounds, foo))
   bounds[1].set_loc(1000)
   self.assertEquals(500, tgrep.binary_search_guess(bounds, foo))
   bounds[0].set_loc(337)
   bounds[1].set_loc(222)
   self.assertEquals(279, tgrep.binary_search_guess(bounds, foo)) # actually is 279.5, testing round
   bounds[0].set_loc(0)
   bounds[1].set_loc(993837478)
   self.assertEquals(496918739, tgrep.binary_search_guess(bounds, foo))
예제 #6
0
  def test_prn_m_2(self):
    global expected_log2

    # zero bytes
    min = LogLocation(1507623, datetime(datetime.now().year,  2, 13, 23, 33, 3),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_LOW)
    min.set_is_min(True)
    max = LogLocation(1507623, datetime(datetime.now().year,  2, 13, 23, 33, 3),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH) # same log, basically
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log2, self.log_entries.getvalue())
예제 #7
0
  def test_prn_m_0(self):
    global expected_log0

    # Feb 13 23:33:11 (one log line)
    # [[1508000, 2011-02-13 23:33:11, 0, 0, True, False], [1508377, 2011-02-13 23:33:15, 1, 1, False, True]]
    min = LogLocation(1508000, datetime(datetime.now().year,  2, 13, 23, 33, 11),
                      LogLocation.MATCH,
                      LogLocation.MATCH)
    min.set_is_min(True)
    max = LogLocation(1508377, datetime(datetime.now().year,  2, 13, 23, 33, 15),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH)
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log0, self.log_entries.getvalue())
예제 #8
0
  def test_prn_m_6(self):
    global expected_log6

    # Feb 13 18:30:30 (Start of File, chunk, no exact matches)
    # [[0, 2011-02-13 18:31:30, 1, -1, True, False], [2639, 2011-02-13 18:32:08, 1, 1, False, True]]
    min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_LOW)
    min.set_is_min(True)
    max = LogLocation(2639, datetime(datetime.now().year,  2, 13, 18, 32, 8),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH)
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log6, self.log_entries.getvalue())
예제 #9
0
  def test_prn_m_5(self):
    global expected_log5

    # Feb 13 18:31:30 (Start of File, exactly one line)
    # [[0, 2011-02-13 18:31:30, 0, 0, True, False], [377, 2011-02-13 18:31:36, 1, 1, False, True]]
    min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                      LogLocation.TOO_LOW,
                      LogLocation.TOO_LOW)
    min.set_is_min(True)
    max = LogLocation(377, datetime(datetime.now().year,  2, 13, 18, 31, 36),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH)
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log5, self.log_entries.getvalue())
예제 #10
0
  def test_prn_m_4(self):
    global expected_log4

    # Feb 14 07:07:39 (End of File, chunk)
    # [[3765853, 2011-02-14 07:07:01, 1, -1, True, False], [3770000, 2011-02-14 07:07:39, 1, 1, False, True]]
    min = LogLocation(3765853, datetime(datetime.now().year,  2, 14, 7, 7, 1),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_LOW)
    min.set_is_min(True)
    max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH)
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log4, self.log_entries.getvalue())
예제 #11
0
  def test_prn_m_1(self):
    global expected_log1

    # Feb 13 23:33 (whole minute)
    # [[1507623, 2011-02-13 23:33:03, 1, -1, True, False], [1512524, 2011-02-13 23:34:03, 1, 1, False, True]]
    min = LogLocation(1507623, datetime(datetime.now().year,  2, 13, 23, 33, 3),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_LOW)
    min.set_is_min(True)
    max = LogLocation(1512524, datetime(datetime.now().year,  2, 13, 23, 34, 3),
                      LogLocation.TOO_HIGH,
                      LogLocation.TOO_HIGH)
    max.set_is_min(True)
    bounds = [min, max]
    tgrep.print_log_lines(self.mlog_file, bounds, self.log_entries)
    self.assertEquals(expected_log1, self.log_entries.getvalue())
예제 #12
0
 def test_ug_6(self):
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_LOW,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   guesses = [min, max]
   new = LogLocation(1234, datetime(datetime.now().year,  2, 13, 19, 0, 0),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   new.set_is_max(True)
   answer = [min, new]
   # update, is_max
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #13
0
 def test_ug_8(self):
   #//! impl in update_guess
   return
   min = LogLocation(0, datetime(datetime.now().year,  2, 13, 18, 31, 30),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_LOW)
   max = LogLocation(3770000, datetime(datetime.now().year,  2, 14, 7, 7, 39),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   max.set_is_max(True)
   guesses = [min, max]
   new = LogLocation(1234, datetime(datetime.now().year,  2, 13, 19, 0, 0),
                     LogLocation.TOO_HIGH,
                     LogLocation.TOO_HIGH)
   answer = [min, max]
   # ignore, is_max already set for max
   tgrep.update_guess(new, guesses)
   self.assertEquals(answer, guesses)
예제 #14
0
파일: tgrep-m.py 프로젝트: cole-brown/tgrep
def optimistic_edge_search(log, guess, times, results):
    """Reads a chunk and checks whole thing for timestamps. Better when it's really close."""
    global stats
    seek_loc = guess._seek_loc
    if guess.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH:
        # we're looking for the max and we're above it, so read from a chunk away to here.
        #    print "looking high"
        #    print "%d %d" % (guess._seek_loc, guess._seek_loc - EDGE_SWEEP_CHUNK_SIZE)
        seek_loc -= EDGE_SWEEP_CHUNK_SIZE
        seek_loc = 0 if seek_loc < 0 else seek_loc
        log.seek(seek_loc)
        stats['seeks'] += 1
    else:
        # we're looking for the min and we're below it, so read starting here.
        log.seek(seek_loc)
        stats['seeks'] += 1
    chunk = log.read(EDGE_SWEEP_CHUNK_SIZE)
    stats['reads'] += 1

    prev_minmax = guess.get_minmax()
    result = LogLocation(
        0, datetime.min, LogLocation.TOO_LOW,
        LogLocation.TOO_HIGH)  # an invalid result to start with
    chunk_loc = 0
    end_loc = chunk.rfind('\n')
    while chunk_loc < end_loc:
        #    print "%d / %d" % (seek_loc + chunk_loc, seek_loc + end_loc)
        try:
            # find the nearest newline so we can find the timestamp
            nl_index = chunk[chunk_loc:].find('\n')
            if nl_index == -1:
                #        print "can't find new line"
                break  # Can't find a newline; we're done.
            nl_index += 1  # get past the newline

            # find the first bit of the line, e.g. "Feb 14 05:52:12 web0"
            # split it on the whitespace, e.g. ["Feb", "14", "05:52:12", "web0"]
            # join the first three back together again with ' ' as the seperator
            # parse the thing!
            # //! need to research a better (faster?) way to do this
            time = parse_time(' '.join(
                chunk[chunk_loc + nl_index:chunk_loc + nl_index +
                      20].split()[:3]))  #//! magic 20

            chunk_loc += nl_index

            result._seek_loc = seek_loc + chunk_loc
            result._timestamp = time
            # compare to desired to see if it's a better max
            if time > times[1]:
                result._relation_to_desired_min = LogLocation.TOO_HIGH
                result._relation_to_desired_max = LogLocation.TOO_HIGH
                # check to see if it's the edge
                if prev_minmax[1] != LogLocation.TOO_HIGH:
                    # We passed out of range. This loc is where we want to /stop/ reading. Save it!
                    result.set_is_max(True)
#        print "short circuit"
                break  # Can short-circuit if find a max, since we're reading buff beginning-to-end.
            elif time == times[1]:
                # do nothing for now about data, may optimize to save off data later.
                result._relation_to_desired_max = LogLocation.MATCH
            else:  # time < times[1]
                result._relation_to_desired_max = LogLocation.TOO_LOW

            # and now the min
            if time < times[0]:
                result._relation_to_desired_min = LogLocation.TOO_LOW
            elif time == times[0]:
                # do nothing for now about data, may optimize to save off data later.
                result._relation_to_desired_min = LogLocation.MATCH
            else:  # time > times[0]
                result._relation_to_desired_min = LogLocation.TOO_HIGH

#      print result.get_minmax()

# see if we got the min edge (max was checked above)
            p = prev_minmax[0]
            r = result._relation_to_desired_min
            #      print "p: %d, r: %d" % (p,r)
            if (prev_minmax == LogLocation.OUT_OF_RANGE_LOW) and (
                    result.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH):
                pass  # //! No matches! Tell the dude and quit!
            elif (p == LogLocation.TOO_LOW) and (r != LogLocation.TOO_LOW):
                # We passed into our range via min. This is one.
                result.set_is_min(True)
                break

            prev_minmax = result.get_minmax()
        except ValueError:  # not a time string found
            print "time parse error"
            pass  # we're ok with occasional non-time string lines. Might start the read in the middle of a line, for example.
        finally:
            chunk_loc += nl_index

#  print result
    results.append(result)
예제 #15
0
파일: tgrep-m.py 프로젝트: cole-brown/tgrep
def wide_sweep(log, filesize, times, num_procs):
    # binary search, with friends!

    # for the global counters...
    global stats
    arr = Array('i', [stats['seeks'], stats['reads']])

    results = Queue()
    nearest_guesses = [
        LogLocation(0, datetime(1999, 1, 1, 00, 00, 00), LogLocation.TOO_LOW,
                    LogLocation.TOO_LOW),  # //! make y1k safe
        LogLocation(filesize,
                    datetime.now().replace(year=3000), LogLocation.TOO_HIGH,
                    LogLocation.TOO_HIGH)
    ]  # //! make y3k safe
    prev_focus, seek_guesses = binary_search_guess(
        nearest_guesses[0]._seek_loc, nearest_guesses[1]._seek_loc, num_procs)
    hits = []
    found = False
    # //! binary search until focal point of search is same
    while not found:
        stats['wide_sweep_loops'] += 1
        children = []
        for seek_loc in seek_guesses:
            p = Process(target=pessismistic_binary_search,
                        args=(log, seek_loc, times, results, arr))
            p.start()
            children.append(p)
        for child in children:
            child.join(
            )  # wait for all procs to finish before calculating next step
        focus = -1
        while not results.empty():
            result = results.get()
            # //! need to check guess error state
            if LogLocation.MATCH in result.get_minmax():
                print "found it!"  # //!
                hits.append(result)
                found = True
#      print result
            update_guess(result,
                         nearest_guesses)  # updates nearest_guesses in place
#      print nearest_guesses
#    print seek_guesses
        focus, seek_guesses = binary_search_guess(nearest_guesses[0]._seek_loc,
                                                  nearest_guesses[1]._seek_loc,
                                                  num_procs)
        #    print seek_guesses

        if focus == prev_focus:
            print "steady state!"  # //!
            found = True
            break
        elif (nearest_guesses[1]._seek_loc -
              nearest_guesses[0]._seek_loc) < WIDE_SWEEP_CLOSE_ENOUGH:
            print "close enough!"  # //!
            found = True
            break
        prev_focus = focus

    print hits
    print nearest_guesses
    print(nearest_guesses[1]._seek_loc - nearest_guesses[0]._seek_loc,
          nearest_guesses[0]._seek_loc, nearest_guesses[1]._seek_loc)
    stats['seeks'] = arr[0]
    stats['reads'] = arr[1]

    return hits, nearest_guesses
예제 #16
0
파일: tgrep-m.py 프로젝트: spydez/tgrep
def optimistic_edge_search(log, guess, times, results):
  """Reads a chunk and checks whole thing for timestamps. Better when it's really close."""
  global stats
  seek_loc = guess._seek_loc
  if guess.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH:
    # we're looking for the max and we're above it, so read from a chunk away to here.
#    print "looking high"
#    print "%d %d" % (guess._seek_loc, guess._seek_loc - EDGE_SWEEP_CHUNK_SIZE)
    seek_loc -= EDGE_SWEEP_CHUNK_SIZE
    seek_loc = 0 if seek_loc < 0 else seek_loc
    log.seek(seek_loc)
    stats['seeks'] += 1
  else:
    # we're looking for the min and we're below it, so read starting here.
    log.seek(seek_loc)
    stats['seeks'] += 1
  chunk = log.read(EDGE_SWEEP_CHUNK_SIZE)
  stats['reads'] += 1

  prev_minmax = guess.get_minmax()
  result = LogLocation(0, datetime.min,
                       LogLocation.TOO_LOW,
                       LogLocation.TOO_HIGH) # an invalid result to start with
  chunk_loc = 0
  end_loc   = chunk.rfind('\n')
  while chunk_loc < end_loc:
#    print "%d / %d" % (seek_loc + chunk_loc, seek_loc + end_loc)
    try:
      # find the nearest newline so we can find the timestamp
      nl_index = chunk[chunk_loc:].find('\n')
      if nl_index == -1:
#        print "can't find new line"
        break # Can't find a newline; we're done.
      nl_index += 1 # get past the newline
      
      # find the first bit of the line, e.g. "Feb 14 05:52:12 web0"
      # split it on the whitespace, e.g. ["Feb", "14", "05:52:12", "web0"]
      # join the first three back together again with ' ' as the seperator
      # parse the thing!
      # //! need to research a better (faster?) way to do this
      time = parse_time(' '.join(chunk[chunk_loc + nl_index : chunk_loc + nl_index + 20].split()[:3])) #//! magic 20
    
      chunk_loc += nl_index

      result._seek_loc  = seek_loc + chunk_loc
      result._timestamp = time
      # compare to desired to see if it's a better max
      if time > times[1]:
        result._relation_to_desired_min = LogLocation.TOO_HIGH
        result._relation_to_desired_max = LogLocation.TOO_HIGH
        # check to see if it's the edge
        if prev_minmax[1] != LogLocation.TOO_HIGH:
          # We passed out of range. This loc is where we want to /stop/ reading. Save it!
          result.set_is_max(True)
#        print "short circuit"
        break  # Can short-circuit if find a max, since we're reading buff beginning-to-end.
      elif time == times[1]:
        # do nothing for now about data, may optimize to save off data later.
        result._relation_to_desired_max = LogLocation.MATCH
      else: # time < times[1]
        result._relation_to_desired_max = LogLocation.TOO_LOW

      # and now the min
      if time < times[0]:
        result._relation_to_desired_min = LogLocation.TOO_LOW
      elif time == times[0]:
        # do nothing for now about data, may optimize to save off data later.
        result._relation_to_desired_min = LogLocation.MATCH
      else: # time > times[0]
        result._relation_to_desired_min = LogLocation.TOO_HIGH

#      print result.get_minmax()

      # see if we got the min edge (max was checked above)
      p = prev_minmax[0]
      r = result._relation_to_desired_min
#      print "p: %d, r: %d" % (p,r)
      if (prev_minmax == LogLocation.OUT_OF_RANGE_LOW) and (result.get_minmax() == LogLocation.OUT_OF_RANGE_HIGH):
        pass # //! No matches! Tell the dude and quit!
      elif (p == LogLocation.TOO_LOW) and (r != LogLocation.TOO_LOW):
        # We passed into our range via min. This is one.
        result.set_is_min(True)
        break

      prev_minmax = result.get_minmax()
    except ValueError: # not a time string found
      print "time parse error"
      pass # we're ok with occasional non-time string lines. Might start the read in the middle of a line, for example.
    finally:
      chunk_loc += nl_index

#  print result
  results.append(result)