예제 #1
0
    def test_lex_sort(self):
        """
    This test ensures that the UUID segment flip enables correct lexicographic
    sorting of the v1 time UUIDs used.

    The timespan tested is 230 years so that the high bits in the time UUID must
    differ.
    """
        seconds = 230 * 365 * 24 * 60 * 60

        uuids = []
        for idx, sec in enumerate(range(0, seconds, 7250000)):
            dt = datetime.datetime.now() + datetime.timedelta(seconds=sec)
            kt = datetime_to_kronos_time(dt)
            event1 = uuid_from_kronos_time(kt)
            event2 = uuid_from_kronos_time(kt)
            events = sorted([event1, event2])
            uuids.append(events[0])
            uuids.append(events[1])

        uuids = [str(uuid) for uuid in uuids]
        flipped_uuids = [sortable_time_uuid_str(uuid) for uuid in uuids]
        flipped_uuids = sorted(flipped_uuids)
        flipped_uuids = [flip_uuid_parts(uuid) for uuid in flipped_uuids]

        self.assertEqual(uuids, flipped_uuids)
예제 #2
0
  def test_lex_sort(self):
    """
    This test ensures that the UUID segment flip enables correct lexicographic
    sorting of the v1 time UUIDs used.

    The timespan tested is 230 years so that the high bits in the time UUID must
    differ.
    """
    seconds = 230 * 365 * 24 * 60 * 60

    uuids = []
    for idx, sec in enumerate(range(0, seconds, 7250000)):
      dt = datetime.datetime.now() + datetime.timedelta(seconds=sec)
      kt = datetime_to_kronos_time(dt)
      event1 = uuid_from_kronos_time(kt)
      event2 = uuid_from_kronos_time(kt)
      events = sorted([event1, event2])
      uuids.append(events[0])
      uuids.append(events[1])

    uuids = [str(uuid) for uuid in uuids]
    flipped_uuids = [sortable_time_uuid_str(uuid) for uuid in uuids] 
    flipped_uuids = sorted(flipped_uuids)
    flipped_uuids = [flip_uuid_parts(uuid) for uuid in flipped_uuids]

    self.assertEqual(uuids, flipped_uuids)
예제 #3
0
  def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                configuration):
    """
    Yield events from stream starting after the event with id `start_id` until
    and including events with timestamp `end_time`.
    """
    start_id_event = Event(start_id)
    end_id_event = Event(uuid_from_kronos_time(end_time,
                                               _type=UUIDType.HIGHEST))
    stream_events = self.db[namespace][stream]

    # Find the interval our events belong to.
    lo = bisect.bisect_left(stream_events, start_id_event)
    if lo + 1 > len(stream_events):
      return
    if stream_events[lo] == start_id_event:
      lo += 1
    hi = bisect.bisect_right(stream_events, end_id_event)
    
    if order == ResultOrder.DESCENDING:
      index_it = xrange(hi-1, lo-1, -1)
    else:
      index_it = xrange(lo, hi)

    for i in index_it:
      if limit <= 0:
        break
      limit -= 1
      yield marshal.dumps(stream_events[i])
예제 #4
0
파일: base.py 프로젝트: tibbetts/chronology
    def retrieve(self,
                 namespace,
                 stream,
                 start_time,
                 end_time,
                 start_id,
                 configuration,
                 order=ResultOrder.ASCENDING,
                 limit=sys.maxint):
        """
    Retrieves all the events for `stream` from `start_time` (inclusive) till
    `end_time` (inclusive). Alternatively to `start_time`, `start_id` can be
    provided, and then all events from `start_id` (exclusive) till `end_time`
    (inclusive) are returned. `start_id` should be used in cases when the client
    got disconnected from the server before all the events in the requested
    time window had been returned. `order` can be one of ResultOrder.ASCENDING
    or ResultOrder.DESCENDING.

    Returns an iterator over all JSON serialized (strings) events.
    """
        if not start_id:
            start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST)
        else:
            start_id = TimeUUID(start_id)
        if uuid_to_kronos_time(start_id) > end_time:
            return []
        return self._retrieve(namespace, stream, start_id, end_time, order,
                              limit, configuration)
예제 #5
0
 def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
               configuration):
   """
   Retrieve events for `stream` between `start_id` and `end_time`.
   `stream` : The stream to return events for.
   `start_id` : Return events with id > `start_id`.
   `end_time` : Return events ending <= `end_time`.
   `order` : Whether to return the results in ResultOrder.ASCENDING
             or ResultOrder.DESCENDING time-order.
   `configuration` : A dictionary of settings to override any default
                     settings, such as number of shards or width of a
                     time interval.
   """
   stream = self.get_stream(namespace, stream, configuration)
   events = stream.iterator(start_id,
                            uuid_from_kronos_time(end_time,
                                                  _type=UUIDType.HIGHEST),
                            order == ResultOrder.DESCENDING, limit)
   events = events.__iter__()
   event = events.next()
   # If first event's ID is equal to `start_id`, skip it.
   if event.id != start_id:
     yield event.json
   while True:
     yield events.next().json
예제 #6
0
 def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
               configuration):
   """
   Retrieve events for `stream` between `start_id` and `end_time`.
   `stream` : The stream to return events for.
   `start_id` : Return events with id > `start_id`.
   `end_time` : Return events ending <= `end_time`.
   `order` : Whether to return the results in ResultOrder.ASCENDING
             or ResultOrder.DESCENDING time-order.
   `configuration` : A dictionary of settings to override any default
                     settings, such as number of shards or width of a
                     time interval.
   """
   stream = self.get_stream(namespace, stream, configuration)
   events = stream.iterator(start_id,
                            uuid_from_kronos_time(end_time,
                                                  _type=UUIDType.HIGHEST),
                            order == ResultOrder.DESCENDING, limit)
   events = events.__iter__()
   event = events.next()
   # If first event's ID is equal to `start_id`, skip it.
   if event.id != start_id:
     yield event.json
   while True:
     yield events.next().json
예제 #7
0
    def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                  configuration):
        """
    Yield events from stream starting after the event with id `start_id` until
    and including events with timestamp `end_time`.
    """
        start_id_event = Event(start_id)
        end_id_event = Event(
            uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST))
        stream_events = self.db[namespace][stream]

        # Find the interval our events belong to.
        lo = bisect.bisect_left(stream_events, start_id_event)
        if lo + 1 > len(stream_events):
            return
        if stream_events[lo] == start_id_event:
            lo += 1
        hi = bisect.bisect_right(stream_events, end_id_event)

        if order == ResultOrder.DESCENDING:
            index_it = xrange(hi - 1, lo - 1, -1)
        else:
            index_it = xrange(lo, hi)

        for i in index_it:
            if limit <= 0:
                break
            limit -= 1
            yield marshal.dumps(stream_events[i])
예제 #8
0
  def __init__(self, namespace, stream, start_time, width, shard, descending,
               limit, read_size):
    self.session = namespace.session
    self.descending = descending
    self.read_size = read_size
    self.limit = limit
    self.key = StreamShard.get_key(stream, start_time, shard)
    self.namespace = namespace

    # If we want to sort in descending order, compare the end of the
    # interval.
    if descending:
      self.cmp_id = uuid_from_kronos_time(start_time + width, UUIDType.HIGHEST)
      self.cmp_id.descending = True
    else:
      self.cmp_id = uuid_from_kronos_time(start_time, UUIDType.LOWEST)

    self._events_future = None
예제 #9
0
파일: base.py 프로젝트: sbisker/chronology
 def delete(self, namespace, stream, start_time, end_time, start_id,
            configuration):
   if not start_id:
     start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST)
   else:
     start_id = TimeUUID(start_id)
   if uuid_to_kronos_time(start_id) > end_time:
     return 0      
   return self._delete(namespace, stream, start_id, end_time, configuration)
예제 #10
0
파일: base.py 프로젝트: tibbetts/chronology
 def delete(self, namespace, stream, start_time, end_time, start_id,
            configuration):
     if not start_id:
         start_id = uuid_from_kronos_time(start_time - 1,
                                          _type=UUIDType.HIGHEST)
     else:
         start_id = TimeUUID(start_id)
     if uuid_to_kronos_time(start_id) > end_time:
         return 0
     return self._delete(namespace, stream, start_id, end_time,
                         configuration)
예제 #11
0
 def _delete(self, namespace, stream, start_id, end_time, configuration):
   """
   Delete events for `stream` between `start_id` and `end_time`.
   `stream` : The stream to delete events for.
   `start_id` : Delete events with id > `start_id`.
   `end_time` : Delete events ending <= `end_time`.
   `configuration` : A dictionary of settings to override any default
                     settings, such as number of shards or width of a
                     time interval.
   """
   stream = self.get_stream(namespace, stream, configuration)
   return stream.delete(start_id,
                        uuid_from_kronos_time(end_time,
                                              _type=UUIDType.HIGHEST))
예제 #12
0
 def _delete(self, namespace, stream, start_id, end_time, configuration):
   """
   Delete events for `stream` between `start_id` and `end_time`.
   `stream` : The stream to delete events for.
   `start_id` : Delete events with id > `start_id`.
   `end_time` : Delete events ending <= `end_time`.
   `configuration` : A dictionary of settings to override any default
                     settings, such as number of shards or width of a
                     time interval.
   """
   stream = self.get_stream(namespace, stream, configuration)
   return stream.delete(start_id,
                        uuid_from_kronos_time(end_time,
                                              _type=UUIDType.HIGHEST))
예제 #13
0
def validate_event_and_assign_id(event):
  """
  Ensure that the event has a valid time. Assign a random UUID based on the
  event time.
  """
  event_time = event.get(TIMESTAMP_FIELD)

  if event_time is None:
    event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time(time.time())
  elif type(event_time) not in (int, long):
    raise InvalidEventTime(event_time)

  # Generate a uuid1-like sequence from the event time with the non-time bytes
  # set to random values.
  _id = uuid_from_kronos_time(event_time)
  event[ID_FIELD] = str(_id)
  return _id, event
예제 #14
0
 def _delete(self, namespace, stream, start_id, end_time, configuration):
   end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)
   self.cursor.execute('''
       DELETE FROM events
       WHERE namespace = ? AND
             stream = ? AND
             ((time = ? AND uuid > ?) OR
              (time > ? AND time <= ?))''',
       (namespace,
        stream,
        start_id.time,
        sortable_time_uuid_str(start_id),
        start_id.time,
        end_id.time))
   rowcount = self.cursor.rowcount
   self.connection.commit()
   return rowcount, []
예제 #15
0
def validate_event_and_assign_id(event):
    """
  Ensure that the event has a valid time. Assign a random UUID based on the
  event time.
  """
    event_time = event.get(TIMESTAMP_FIELD)

    if event_time is None:
        event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time(
            time.time())
    elif type(event_time) not in (int, long):
        raise InvalidEventTime(event_time)

    # Generate a uuid1-like sequence from the event time with the non-time bytes
    # set to random values.
    _id = uuid_from_kronos_time(event_time)
    event[ID_FIELD] = str(_id)
    return _id, event
예제 #16
0
  def _delete(self, namespace, stream, start_id, end_time, configuration):
    """
    Delete events with id > `start_id` and end_time <= `end_time`.
    """
    start_id_event = Event(start_id)
    end_id_event = Event(uuid_from_kronos_time(end_time,
                                               _type=UUIDType.HIGHEST))
    stream_events = self.db[namespace][stream]

    # Find the interval our events belong to.
    lo = bisect.bisect_left(stream_events, start_id_event)
    if lo + 1 > len(stream_events):
      return 0, []
    if stream_events[lo] == start_id_event:
      lo += 1
    hi = bisect.bisect_right(stream_events, end_id_event)

    del stream_events[lo:hi]
    return max(0, hi - lo), []
예제 #17
0
    def _delete(self, namespace, stream, start_id, end_time, configuration):
        """
    Delete events with id > `start_id` and end_time <= `end_time`.
    """
        start_id_event = Event(start_id)
        end_id_event = Event(
            uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST))
        stream_events = self.db[namespace][stream]

        # Find the interval our events belong to.
        lo = bisect.bisect_left(stream_events, start_id_event)
        if lo + 1 > len(stream_events):
            return 0, []
        if stream_events[lo] == start_id_event:
            lo += 1
        hi = bisect.bisect_right(stream_events, end_id_event)

        del stream_events[lo:hi]
        return max(0, hi - lo), []
예제 #18
0
  def retrieve(self, namespace, stream, start_time, end_time, start_id,
               configuration, order=ResultOrder.ASCENDING, limit=sys.maxint):
    """
    Retrieves all the events for `stream` from `start_time` (inclusive) till
    `end_time` (inclusive). Alternatively to `start_time`, `start_id` can be
    provided, and then all events from `start_id` (exclusive) till `end_time`
    (inclusive) are returned. `start_id` should be used in cases when the client
    got disconnected from the server before all the events in the requested
    time window had been returned. `order` can be one of ResultOrder.ASCENDING
    or ResultOrder.DESCENDING.

    Returns an iterator over all JSON serialized (strings) events.
    """
    if not start_id:
      start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST)
    else:
      start_id = TimeUUID(start_id)
    if uuid_to_kronos_time(start_id) > end_time:
      return []
    return self._retrieve(namespace, stream, start_id, end_time, order, limit,
                          configuration)
예제 #19
0
  def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                configuration):
    start_id_event = Event(start_id)
    end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)
    direction = 'ASC' if order == ResultOrder.ASCENDING else 'DESC'

    for event, in self.cursor.execute('''
        SELECT event FROM events
        WHERE namespace = ? AND
              stream = ? AND
              ((time = ? AND uuid > ?) OR
               (time > ? AND time <= ?))
        ORDER BY time %s, uuid %s''' % (direction, direction),
        (namespace,
         stream,
         start_id.time,
         sortable_time_uuid_str(start_id),
         start_id.time,
         end_id.time)):
      if limit == 0:
        return
      else:
        limit -= 1
      yield event
예제 #20
0
def uuid_from_time(time, uuid_type=UUIDType.RANDOM):
  return uuid_from_kronos_time(epoch_time_to_kronos_time(time), uuid_type)
예제 #21
0
def uuid_from_time(time, uuid_type=UUIDType.RANDOM):
  return uuid_from_kronos_time(epoch_time_to_kronos_time(time), uuid_type)
예제 #22
0
  def _retrieve(self, namespace, stream, start_id,
                  end_time, order, limit, configuration):
    """
    Yield events from stream starting after the event with id `start_id` until
    and including events with timestamp `end_time`.
    """
    indices = self.index_manager.get_aliases(namespace,
                                             uuid_to_kronos_time(start_id),
                                             end_time)
    if not indices:
      return

    end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)
    end_id.descending = start_id.descending = descending = (
      order == ResultOrder.DESCENDING)
    
    start_time = uuid_to_kronos_time(start_id)
    body_query = {
      'query': {
        'filtered': {
          'query': {'match_all': {}},
          'filter': {
            'range': {TIMESTAMP_FIELD: {'gte': start_time, 'lte': end_time}}
            }
          }
        }
      }
    order = 'desc' if descending else 'asc'
    sort_query = [
      '%s:%s' % (TIMESTAMP_FIELD, order),
      '%s:%s' % (ID_FIELD, order)
      ]

    last_id = end_id if descending else start_id
    scroll_id = None
    while True:
      size = max(min(limit, configuration['read_size']) / self.shards, 10)
      if scroll_id is None:
        res = self.es.search(index=indices,
                             doc_type=stream,
                             size=size,
                             body=body_query,
                             sort=sort_query,
                             _source=True,
                             scroll='1m',
                             ignore=[400, 404],
                             allow_no_indices=True,
                             ignore_unavailable=True)
      else:
        res = self.es.scroll(scroll_id, scroll='1m')
      if '_scroll_id' not in res:
        break
      scroll_id = res['_scroll_id']
      hits = res.get('hits', {}).get('hits')
      if not hits:
        break

      for hit in hits:
        _id = TimeUUID(hit['_id'], descending=descending)
        if _id <= last_id:
          continue
        last_id = _id
        event = hit['_source']
        yield json.dumps(event)
        limit -= 1
        if limit == 0:
          break

    if scroll_id is not None:
      self.es.clear_scroll(scroll_id)
예제 #23
0
    def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                  configuration):
        """
    Yield events from stream starting after the event with id `start_id` until
    and including events with timestamp `end_time`.
    """
        indices = self.index_manager.get_aliases(namespace,
                                                 uuid_to_kronos_time(start_id),
                                                 end_time)
        if not indices:
            return

        end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)
        end_id.descending = start_id.descending = descending = (
            order == ResultOrder.DESCENDING)

        start_time = uuid_to_kronos_time(start_id)
        body_query = {
            'query': {
                'filtered': {
                    'query': {
                        'match_all': {}
                    },
                    'filter': {
                        'range': {
                            TIMESTAMP_FIELD: {
                                'gte': start_time,
                                'lte': end_time
                            }
                        }
                    }
                }
            }
        }
        order = 'desc' if descending else 'asc'
        sort_query = [
            '%s:%s' % (TIMESTAMP_FIELD, order),
            '%s:%s' % (ID_FIELD, order)
        ]

        last_id = end_id if descending else start_id
        scroll_id = None
        while True:
            size = max(
                min(limit, configuration['read_size']) / self.shards, 10)
            if scroll_id is None:
                res = self.es.search(index=indices,
                                     doc_type=stream,
                                     size=size,
                                     body=body_query,
                                     sort=sort_query,
                                     _source=True,
                                     scroll='1m',
                                     ignore=[400, 404],
                                     allow_no_indices=True,
                                     ignore_unavailable=True)
            else:
                res = self.es.scroll(scroll_id, scroll='1m')
            if '_scroll_id' not in res:
                break
            scroll_id = res['_scroll_id']
            hits = res.get('hits', {}).get('hits')
            if not hits:
                break

            for hit in hits:
                _id = TimeUUID(hit['_id'], descending=descending)
                if _id <= last_id:
                    continue
                last_id = _id
                event = hit['_source']
                del event[LOGSTASH_TIMESTAMP_FIELD]
                yield json.dumps(event)
                limit -= 1
                if limit == 0:
                    break

        if scroll_id is not None:
            self.es.clear_scroll(scroll_id)