Exemple #1
0
    def test_descending(self):
        uuid_strs = list(get_str_uuids(10000))
        uuids = sorted(map(lambda s: TimeUUID(s), uuid_strs))
        descending_uuids = sorted(
            map(lambda s: TimeUUID(s, descending=True), uuid_strs))
        self.assertEqual(uuids, descending_uuids[::-1])

        for uu, duu in zip(uuids, descending_uuids[::-1]):
            self.assertTrue(uu == duu)
            self.assertTrue(uu <= duu)
            self.assertTrue(uu >= duu)
            self.assertFalse(uu != duu)
Exemple #2
0
    def retrieve(self,
                 namespace,
                 stream,
                 start_time,
                 end_time,
                 start_id,
                 configuration,
                 order=ResultOrder.ASCENDING,
                 limit=sys.maxint):
        """
    Retrieves all the events for `stream` from `start_time` (inclusive) till
    `end_time` (inclusive). Alternatively to `start_time`, `start_id` can be
    provided, and then all events from `start_id` (exclusive) till `end_time`
    (inclusive) are returned. `start_id` should be used in cases when the client
    got disconnected from the server before all the events in the requested
    time window had been returned. `order` can be one of ResultOrder.ASCENDING
    or ResultOrder.DESCENDING.

    Returns an iterator over all JSON serialized (strings) events.
    """
        if not start_id:
            start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST)
        else:
            start_id = TimeUUID(start_id)
        if uuid_to_kronos_time(start_id) > end_time:
            return []
        return self._retrieve(namespace, stream, start_id, end_time, order,
                              limit, configuration)
Exemple #3
0
  def test_rollover(self):
    settings.storage.elasticsearch.rollover_size = 10
    settings.storage.elasticsearch.rollover_check_period_seconds = 2
    reload_router()

    indices = set()
    for i in xrange(50):
      indices.add(router.get_backend('elasticsearch')
                  .index_manager.get_index('kronos'))
      self.put('test_rollover', [{TIMESTAMP_FIELD: 0} for _ in xrange(5)])
      gevent.sleep(0.05)

    # Has index rolled over?
    self.assertTrue(len(indices) > 1)

    # No events were lost?
    events = self.get('test_rollover', 0, 1)
    self.assertEqual(len(events), 50 * 5)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    es = router.get_backend('elasticsearch').es
    index = router.get_backend('elasticsearch').index_manager.get_index(
      'kronos')
    indices.discard(index)
    for index in indices:
      self.assertTrue(es.count(index=index)['count'] >= 10)
Exemple #4
0
 def delete(self, namespace, stream, start_time, end_time, start_id,
            configuration):
     if not start_id:
         start_id = uuid_from_kronos_time(start_time - 1,
                                          _type=UUIDType.HIGHEST)
     else:
         start_id = TimeUUID(start_id)
     if uuid_to_kronos_time(start_id) > end_time:
         return 0
     return self._delete(namespace, stream, start_id, end_time,
                         configuration)
Exemple #5
0
    def test_cmp(self):
        uuids = map(lambda s: TimeUUID(s), get_str_uuids(10000))
        random.shuffle(uuids)
        for _ in xrange(10000):
            a, b = random.choice(uuids), random.choice(uuids)
            self.assertEqual(cmp(a, b), py_cmp(a, b))

        for _ in xrange(1000):
            i = random.randint(1, 9999)
            self.assertTrue(uuids[i] == uuids[i])
            self.assertTrue(uuids[i] >= uuids[i])
            self.assertTrue(uuids[i] <= uuids[i])
            self.assertFalse(uuids[i] != uuids[i])
            self.assertFalse(uuids[i - 1] == uuids[i])
            self.assertTrue(uuids[i - 1] != uuids[i])
Exemple #6
0
 def test_bytes_init(self):
     for uuid_str in get_str_uuids(10000):
         uu = UUID(uuid_str)
         tuu1 = TimeUUID(str(uu))
         tuu2 = TimeUUID(bytes=uu.bytes)
         self.assertEqual(tuu1, tuu2)
Exemple #7
0
 def test_str(self):
     for uuid_str in get_str_uuids(20000):
         tuu = TimeUUID(uuid_str)
         self.assertEqual(uuid_str, str(tuu))
         self.assertEqual('TimeUUID(%s)' % uuid_str, repr(tuu))
Exemple #8
0
 def test_bytes(self):
     for _id in get_str_uuids(10000):
         uu = UUID(_id)
         tuu = TimeUUID(_id)
         self.assertEqual(uu.bytes, tuu.bytes)
Exemple #9
0
 def test_time(self):
     for uuid_str in get_str_uuids(10000):
         uu = UUID(uuid_str)
         tuu = TimeUUID(uuid_str)
         self.assertEqual(uu.time, tuu.time)
Exemple #10
0
    def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                  configuration):
        """
    Yield events from stream starting after the event with id `start_id` until
    and including events with timestamp `end_time`.
    """
        indices = self.index_manager.get_aliases(namespace,
                                                 uuid_to_kronos_time(start_id),
                                                 end_time)
        if not indices:
            return

        end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST)
        end_id.descending = start_id.descending = descending = (
            order == ResultOrder.DESCENDING)

        start_time = uuid_to_kronos_time(start_id)
        body_query = {
            'query': {
                'filtered': {
                    'query': {
                        'match_all': {}
                    },
                    'filter': {
                        'range': {
                            TIMESTAMP_FIELD: {
                                'gte': start_time,
                                'lte': end_time
                            }
                        }
                    }
                }
            }
        }
        order = 'desc' if descending else 'asc'
        sort_query = [
            '%s:%s' % (TIMESTAMP_FIELD, order),
            '%s:%s' % (ID_FIELD, order)
        ]

        last_id = end_id if descending else start_id
        scroll_id = None
        while True:
            size = max(
                min(limit, configuration['read_size']) / self.shards, 10)
            if scroll_id is None:
                res = self.es.search(index=indices,
                                     doc_type=stream,
                                     size=size,
                                     body=body_query,
                                     sort=sort_query,
                                     _source=True,
                                     scroll='1m',
                                     ignore=[400, 404],
                                     allow_no_indices=True,
                                     ignore_unavailable=True)
            else:
                res = self.es.scroll(scroll_id, scroll='1m')
            if '_scroll_id' not in res:
                break
            scroll_id = res['_scroll_id']
            hits = res.get('hits', {}).get('hits')
            if not hits:
                break

            for hit in hits:
                _id = TimeUUID(hit['_id'], descending=descending)
                if _id <= last_id:
                    continue
                last_id = _id
                event = hit['_source']
                del event[LOGSTASH_TIMESTAMP_FIELD]
                yield json.dumps(event)
                limit -= 1
                if limit == 0:
                    break

        if scroll_id is not None:
            self.es.clear_scroll(scroll_id)
Exemple #11
0
 def deserialize(byts, protocol_version):
   return TimeUUID(bytes=byts)
Exemple #12
0
    def test_get(self):
        stream = 'TestKronosAPIs_test_get'
        event1 = [{'a': 1, TIMESTAMP_FIELD: 1}]
        event2 = [{'a': 2, TIMESTAMP_FIELD: 2}]
        event3 = [{'a': 3, TIMESTAMP_FIELD: 3}]
        event4 = [{'a': 4, TIMESTAMP_FIELD: 3}]

        # Test get from non-existent streams.
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 0)

        # Test get with intervals that have and don't have events.
        self.put(stream, event1)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 1)
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 0)

        # Test get with different time slices.
        self.put(stream, event2)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 1)
        event2_id = events[0][ID_FIELD]

        self.put(stream, event3)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get for overlapping time events.
        self.put(stream, event4)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 3, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get for `start_time` and `end_time` inclusivity.
        events = self.get(stream, 1, 3)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get with `start_id`.
        events = self.get(stream, None, 4, start_id=event2_id)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        for event in events:
            self.assertEqual(event[TIMESTAMP_FIELD], 3)

        # Test get with `limit`.
        events = self.get(stream, 0, 4, limit=2)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
        self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
        events = self.get(stream, 0, 4, limit=3)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
        self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
        self.assertEqual(events[2][TIMESTAMP_FIELD], 3)
        events = self.get(stream, 0, 4, limit=0)
        self.assertEqual(len(events), 0)

        # Test get with `order`.
        events = self.get(stream, 0, 4, order=ResultOrder.ASCENDING)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 0, 4, order=ResultOrder.DESCENDING)
        self.assertEqual(len(events), 4)
        self.assertEqual(
            events,
            sorted(events,
                   key=lambda e: TimeUUID(e[ID_FIELD], descending=True)))

        # Test get with weird time ranges.
        # `start_time` == `end_time`
        self.assertEqual(len(self.get(stream, 3, 3)), 2)
        self.assertEqual(len(self.get(stream, 4, 4)), 0)

        # `start_time` and `end_time` in the future.
        now = epoch_time_to_kronos_time(time.time())
        self.assertEqual(
            len(
                self.get(stream, now + epoch_time_to_kronos_time(1000),
                         now + epoch_time_to_kronos_time(2000))), 0)
        # `start_time` > `end_time`
        self.assertEqual(len(self.get(stream, 10, 5)), 0)

        # `start_time` < 0 and `end_time` < 0
        self.assertEqual(len(self.get(stream, -2000, -1000)), 0)
Exemple #13
0
  def test_aliasing(self):
    settings.storage.elasticsearch.rollover_size = 10
    reload_router(kill_update_thread=True)
    index1 = router.get_backend('elasticsearch').index_manager.get_index(
      'kronos')

    time1 = datetime_to_kronos_time(datetime(2014, 1, 1, 0))
    time2 = datetime_to_kronos_time(datetime(2014, 1, 2, 0))
    time3 = datetime_to_kronos_time(datetime(2014, 1, 3, 0))
    self.put('test_aliasing', [{TIMESTAMP_FIELD: time1, 'i': i, 'j': 0}
                               for i in xrange(5)])
    self.put('test_aliasing', [{TIMESTAMP_FIELD: time2, 'i': i, 'j': 0}
                               for i in xrange(5)])

    router.get_backend('elasticsearch').index_manager.update()
    index2 = router.get_backend('elasticsearch').index_manager.get_index(
      'kronos')

    self.put('test_aliasing', [{TIMESTAMP_FIELD: time2, 'i': i, 'j': 1}
                               for i in xrange(5)])
    self.put('test_aliasing', [{TIMESTAMP_FIELD: time3, 'i': i, 'j': 1}
                               for i in xrange(5)])

    router.get_backend('elasticsearch').index_manager.update()
    index3 = router.get_backend('elasticsearch').index_manager.get_index(
      'kronos')

    # Has index rolled over each time?
    self.assertTrue(index1 != index2)
    self.assertTrue(index2 != index3)
    self.assertTrue(index1 != index3)

    events = self.get('test_aliasing', time1, time3)
    self.assertEqual(len(events), 20)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    events = self.get('test_aliasing', time1, time2)
    self.assertEqual(len(events), 15)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    events = self.get('test_aliasing', time2, time3)
    self.assertEqual(len(events), 15)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    es = router.get_backend('elasticsearch').es
    aliases = es.indices.get_aliases(index=[index1, index2, index3])
    self.assertEqual(len(aliases), 2)
    self.assertTrue(index1 in aliases)
    self.assertTrue(index2 in aliases)
    self.assertEqual(set(aliases[index1]['aliases']) &
                     set(aliases[index2]['aliases']),
                     set(['kronos_test:kronos:2014.01.02']))
    self.assertEqual(set(aliases[index1]['aliases']),
                     set(['kronos_test:kronos:2014.01.01',
                          'kronos_test:kronos:2014.01.02']))
    self.assertEqual(set(aliases[index2]['aliases']),
                     set(['kronos_test:kronos:2014.01.02',
                          'kronos_test:kronos:2014.01.03']))

    self.assertEqual(es.count(index=index1)['count'], 10)
    self.assertEqual(es.count(index=index2)['count'], 10)
    self.assertEqual(es.count(index=index3,
                              ignore_unavailable=True).get('count', 0), 0)