def test_lex_sort(self): """ This test ensures that the UUID segment flip enables correct lexicographic sorting of the v1 time UUIDs used. The timespan tested is 230 years so that the high bits in the time UUID must differ. """ seconds = 230 * 365 * 24 * 60 * 60 uuids = [] for idx, sec in enumerate(range(0, seconds, 7250000)): dt = datetime.datetime.now() + datetime.timedelta(seconds=sec) kt = datetime_to_kronos_time(dt) event1 = uuid_from_kronos_time(kt) event2 = uuid_from_kronos_time(kt) events = sorted([event1, event2]) uuids.append(events[0]) uuids.append(events[1]) uuids = [str(uuid) for uuid in uuids] flipped_uuids = [sortable_time_uuid_str(uuid) for uuid in uuids] flipped_uuids = sorted(flipped_uuids) flipped_uuids = [flip_uuid_parts(uuid) for uuid in flipped_uuids] self.assertEqual(uuids, flipped_uuids)
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Yield events from stream starting after the event with id `start_id` until and including events with timestamp `end_time`. """ start_id_event = Event(start_id) end_id_event = Event(uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)) stream_events = self.db[namespace][stream] # Find the interval our events belong to. lo = bisect.bisect_left(stream_events, start_id_event) if lo + 1 > len(stream_events): return if stream_events[lo] == start_id_event: lo += 1 hi = bisect.bisect_right(stream_events, end_id_event) if order == ResultOrder.DESCENDING: index_it = xrange(hi-1, lo-1, -1) else: index_it = xrange(lo, hi) for i in index_it: if limit <= 0: break limit -= 1 yield marshal.dumps(stream_events[i])
def retrieve(self, namespace, stream, start_time, end_time, start_id, configuration, order=ResultOrder.ASCENDING, limit=sys.maxint): """ Retrieves all the events for `stream` from `start_time` (inclusive) till `end_time` (inclusive). Alternatively to `start_time`, `start_id` can be provided, and then all events from `start_id` (exclusive) till `end_time` (inclusive) are returned. `start_id` should be used in cases when the client got disconnected from the server before all the events in the requested time window had been returned. `order` can be one of ResultOrder.ASCENDING or ResultOrder.DESCENDING. Returns an iterator over all JSON serialized (strings) events. """ if not start_id: start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST) else: start_id = TimeUUID(start_id) if uuid_to_kronos_time(start_id) > end_time: return [] return self._retrieve(namespace, stream, start_id, end_time, order, limit, configuration)
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Retrieve events for `stream` between `start_id` and `end_time`. `stream` : The stream to return events for. `start_id` : Return events with id > `start_id`. `end_time` : Return events ending <= `end_time`. `order` : Whether to return the results in ResultOrder.ASCENDING or ResultOrder.DESCENDING time-order. `configuration` : A dictionary of settings to override any default settings, such as number of shards or width of a time interval. """ stream = self.get_stream(namespace, stream, configuration) events = stream.iterator(start_id, uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST), order == ResultOrder.DESCENDING, limit) events = events.__iter__() event = events.next() # If first event's ID is equal to `start_id`, skip it. if event.id != start_id: yield event.json while True: yield events.next().json
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Yield events from stream starting after the event with id `start_id` until and including events with timestamp `end_time`. """ start_id_event = Event(start_id) end_id_event = Event( uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)) stream_events = self.db[namespace][stream] # Find the interval our events belong to. lo = bisect.bisect_left(stream_events, start_id_event) if lo + 1 > len(stream_events): return if stream_events[lo] == start_id_event: lo += 1 hi = bisect.bisect_right(stream_events, end_id_event) if order == ResultOrder.DESCENDING: index_it = xrange(hi - 1, lo - 1, -1) else: index_it = xrange(lo, hi) for i in index_it: if limit <= 0: break limit -= 1 yield marshal.dumps(stream_events[i])
def __init__(self, namespace, stream, start_time, width, shard, descending, limit, read_size): self.session = namespace.session self.descending = descending self.read_size = read_size self.limit = limit self.key = StreamShard.get_key(stream, start_time, shard) self.namespace = namespace # If we want to sort in descending order, compare the end of the # interval. if descending: self.cmp_id = uuid_from_kronos_time(start_time + width, UUIDType.HIGHEST) self.cmp_id.descending = True else: self.cmp_id = uuid_from_kronos_time(start_time, UUIDType.LOWEST) self._events_future = None
def delete(self, namespace, stream, start_time, end_time, start_id, configuration): if not start_id: start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST) else: start_id = TimeUUID(start_id) if uuid_to_kronos_time(start_id) > end_time: return 0 return self._delete(namespace, stream, start_id, end_time, configuration)
def delete(self, namespace, stream, start_time, end_time, start_id, configuration): if not start_id: start_id = uuid_from_kronos_time(start_time - 1, _type=UUIDType.HIGHEST) else: start_id = TimeUUID(start_id) if uuid_to_kronos_time(start_id) > end_time: return 0 return self._delete(namespace, stream, start_id, end_time, configuration)
def _delete(self, namespace, stream, start_id, end_time, configuration): """ Delete events for `stream` between `start_id` and `end_time`. `stream` : The stream to delete events for. `start_id` : Delete events with id > `start_id`. `end_time` : Delete events ending <= `end_time`. `configuration` : A dictionary of settings to override any default settings, such as number of shards or width of a time interval. """ stream = self.get_stream(namespace, stream, configuration) return stream.delete(start_id, uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST))
def validate_event_and_assign_id(event): """ Ensure that the event has a valid time. Assign a random UUID based on the event time. """ event_time = event.get(TIMESTAMP_FIELD) if event_time is None: event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time(time.time()) elif type(event_time) not in (int, long): raise InvalidEventTime(event_time) # Generate a uuid1-like sequence from the event time with the non-time bytes # set to random values. _id = uuid_from_kronos_time(event_time) event[ID_FIELD] = str(_id) return _id, event
def _delete(self, namespace, stream, start_id, end_time, configuration): end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST) self.cursor.execute(''' DELETE FROM events WHERE namespace = ? AND stream = ? AND ((time = ? AND uuid > ?) OR (time > ? AND time <= ?))''', (namespace, stream, start_id.time, sortable_time_uuid_str(start_id), start_id.time, end_id.time)) rowcount = self.cursor.rowcount self.connection.commit() return rowcount, []
def validate_event_and_assign_id(event): """ Ensure that the event has a valid time. Assign a random UUID based on the event time. """ event_time = event.get(TIMESTAMP_FIELD) if event_time is None: event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time( time.time()) elif type(event_time) not in (int, long): raise InvalidEventTime(event_time) # Generate a uuid1-like sequence from the event time with the non-time bytes # set to random values. _id = uuid_from_kronos_time(event_time) event[ID_FIELD] = str(_id) return _id, event
def _delete(self, namespace, stream, start_id, end_time, configuration): """ Delete events with id > `start_id` and end_time <= `end_time`. """ start_id_event = Event(start_id) end_id_event = Event(uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)) stream_events = self.db[namespace][stream] # Find the interval our events belong to. lo = bisect.bisect_left(stream_events, start_id_event) if lo + 1 > len(stream_events): return 0, [] if stream_events[lo] == start_id_event: lo += 1 hi = bisect.bisect_right(stream_events, end_id_event) del stream_events[lo:hi] return max(0, hi - lo), []
def _delete(self, namespace, stream, start_id, end_time, configuration): """ Delete events with id > `start_id` and end_time <= `end_time`. """ start_id_event = Event(start_id) end_id_event = Event( uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)) stream_events = self.db[namespace][stream] # Find the interval our events belong to. lo = bisect.bisect_left(stream_events, start_id_event) if lo + 1 > len(stream_events): return 0, [] if stream_events[lo] == start_id_event: lo += 1 hi = bisect.bisect_right(stream_events, end_id_event) del stream_events[lo:hi] return max(0, hi - lo), []
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): start_id_event = Event(start_id) end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST) direction = 'ASC' if order == ResultOrder.ASCENDING else 'DESC' for event, in self.cursor.execute(''' SELECT event FROM events WHERE namespace = ? AND stream = ? AND ((time = ? AND uuid > ?) OR (time > ? AND time <= ?)) ORDER BY time %s, uuid %s''' % (direction, direction), (namespace, stream, start_id.time, sortable_time_uuid_str(start_id), start_id.time, end_id.time)): if limit == 0: return else: limit -= 1 yield event
def uuid_from_time(time, uuid_type=UUIDType.RANDOM): return uuid_from_kronos_time(epoch_time_to_kronos_time(time), uuid_type)
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Yield events from stream starting after the event with id `start_id` until and including events with timestamp `end_time`. """ indices = self.index_manager.get_aliases(namespace, uuid_to_kronos_time(start_id), end_time) if not indices: return end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST) end_id.descending = start_id.descending = descending = ( order == ResultOrder.DESCENDING) start_time = uuid_to_kronos_time(start_id) body_query = { 'query': { 'filtered': { 'query': {'match_all': {}}, 'filter': { 'range': {TIMESTAMP_FIELD: {'gte': start_time, 'lte': end_time}} } } } } order = 'desc' if descending else 'asc' sort_query = [ '%s:%s' % (TIMESTAMP_FIELD, order), '%s:%s' % (ID_FIELD, order) ] last_id = end_id if descending else start_id scroll_id = None while True: size = max(min(limit, configuration['read_size']) / self.shards, 10) if scroll_id is None: res = self.es.search(index=indices, doc_type=stream, size=size, body=body_query, sort=sort_query, _source=True, scroll='1m', ignore=[400, 404], allow_no_indices=True, ignore_unavailable=True) else: res = self.es.scroll(scroll_id, scroll='1m') if '_scroll_id' not in res: break scroll_id = res['_scroll_id'] hits = res.get('hits', {}).get('hits') if not hits: break for hit in hits: _id = TimeUUID(hit['_id'], descending=descending) if _id <= last_id: continue last_id = _id event = hit['_source'] yield json.dumps(event) limit -= 1 if limit == 0: break if scroll_id is not None: self.es.clear_scroll(scroll_id)
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Yield events from stream starting after the event with id `start_id` until and including events with timestamp `end_time`. """ indices = self.index_manager.get_aliases(namespace, uuid_to_kronos_time(start_id), end_time) if not indices: return end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST) end_id.descending = start_id.descending = descending = ( order == ResultOrder.DESCENDING) start_time = uuid_to_kronos_time(start_id) body_query = { 'query': { 'filtered': { 'query': { 'match_all': {} }, 'filter': { 'range': { TIMESTAMP_FIELD: { 'gte': start_time, 'lte': end_time } } } } } } order = 'desc' if descending else 'asc' sort_query = [ '%s:%s' % (TIMESTAMP_FIELD, order), '%s:%s' % (ID_FIELD, order) ] last_id = end_id if descending else start_id scroll_id = None while True: size = max( min(limit, configuration['read_size']) / self.shards, 10) if scroll_id is None: res = self.es.search(index=indices, doc_type=stream, size=size, body=body_query, sort=sort_query, _source=True, scroll='1m', ignore=[400, 404], allow_no_indices=True, ignore_unavailable=True) else: res = self.es.scroll(scroll_id, scroll='1m') if '_scroll_id' not in res: break scroll_id = res['_scroll_id'] hits = res.get('hits', {}).get('hits') if not hits: break for hit in hits: _id = TimeUUID(hit['_id'], descending=descending) if _id <= last_id: continue last_id = _id event = hit['_source'] del event[LOGSTASH_TIMESTAMP_FIELD] yield json.dumps(event) limit -= 1 if limit == 0: break if scroll_id is not None: self.es.clear_scroll(scroll_id)