Example #1
0
  def test_index(self):
    ''' Inserts events in to a stream and ensures that the index column
    family contains all the of expected row keys. '''

    stream_name = 'TestCassandraBackend_test_index'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)

    for i in xrange(100):
      self.put(stream_name, [{TIMESTAMP_FIELD:
                              epoch_time_to_kronos_time(i % 20)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(20))
    self.assertEqual(len(events), 100)

    time_shards = defaultdict(set)
    for shard_desc in stream.get_overlapping_shards(
      0, epoch_time_to_kronos_time(20)):
      self.assertEqual(shard_desc['width'], self.width)
      time_shards[shard_desc['start_time']] |= {shard_desc['shard']}

    for shards in time_shards.itervalues():
      self.assertEqual(len(shards), self.shards)
      self.assertEqual(shards, set(xrange(self.shards)))

    self.assertEqual(set(time_shards),
                     set(round_down(epoch_time_to_kronos_time(t), self.width)
                         for t in xrange(20)))
Example #2
0
  def test_stream_sharding(self):
    ''' Inserts elements that fall into the same time bucket and checks if they
    are being sharded properly. '''

    stream_name = 'TestCassandraBackend_test_stream_sharding'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)

    # Since we have a small number of shards, let's just assume that if we
    # insert 100 events to the same bucket, we'll have inserted at least one
    # event in each shard.
    for i in xrange(100):
      self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(1)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(2))
    self.assertEqual(len(events), 100)

    num_events = 0

    for shard in xrange(self.shards):
      stream_shard = StreamShard(stream.namespace, stream_name, 0, self.width,
                                 shard, False, MAX_LIMIT, 100)
      events = list(stream_shard.iterator(uuid_from_time(0),
                                          uuid_from_time(2)))
      self.assertTrue(len(events) > 0)
      num_events += len(events)
    # These 3 shards should contain all the events inserted into the stream in
    # the interval [0, 2).
    self.assertEqual(num_events, 100)
Example #3
0
 def test_stream_sharding(self):
   ''' Inserts elements that fall into the same time bucket and checks if they
   are being sharded properly. '''
   
   stream_name = 'TestCassandraBackend_test_stream_sharding'
   stream = self.namespace.get_stream(stream_name, self.width, self.shards)
   
   # Since we have a small number of shards, let's just assume that if we
   # insert 100 events to the same bucket, we'll have inserted at least one
   # event in each shard.    
   for i in xrange(100):
     self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(1)}])
   events = self.get(stream_name, 0, epoch_time_to_kronos_time(2))
   self.assertEqual(len(events), 100)
     
   num_events = 0
   
   for shard in xrange(self.shards):
     stream_shard = StreamShard(stream.namespace, stream_name, 0, self.width,
                                shard, False, MAX_LIMIT, 100)
     events = list(stream_shard.iterator(uuid_from_time(0),
                                         uuid_from_time(2)))
     self.assertTrue(len(events) > 0)
     num_events += len(events)
   # These 3 shards should contain all the events inserted into the stream in
   # the interval [0, 2).
   self.assertEqual(num_events, 100)
Example #4
0
  def test_index(self):
    ''' Inserts events in to a stream and ensures that the index column
    family contains all the of expected row keys. '''
    
    stream_name = 'TestCassandraBackend_test_index'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)
    
    for i in xrange(100):
      self.put(stream_name, [{TIMESTAMP_FIELD:
                              epoch_time_to_kronos_time(i % 20)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(20))
    self.assertEqual(len(events), 100)

    time_shards = defaultdict(set)
    for shard_desc in stream.get_overlapping_shards(
      0, epoch_time_to_kronos_time(20)):
      self.assertEqual(shard_desc['width'], self.width)
      time_shards[shard_desc['start_time']] |= {shard_desc['shard']}

    for shards in time_shards.itervalues():
      self.assertEqual(len(shards), self.shards)
      self.assertEqual(shards, set(xrange(self.shards)))

    self.assertEqual(set(time_shards),
                     set(round_down(epoch_time_to_kronos_time(t), self.width)
                         for t in xrange(20)))
Example #5
0
    def test_overlapping_shards(self):
        """ Tests that changing bucket widths doesn\'t break shit. First inserts
    events into two contiguous shards when the bucket width is 2 seconds. Then
    it changes bucket width to 4 and inserts the same events again, causing
    all events to go into a single bucket. At the end it checks if the first
    bucket has 3x the number of events as the seconds bucket. """

        stream_name = "TestCassandraBackend_test_overlapping_shards"
        stream = self.namespace.get_stream(stream_name, self.width, self.shards)

        for i in xrange(60):
            self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(i % 4)}])
        events = self.get(stream_name, 0, epoch_time_to_kronos_time(4))
        self.assertEqual(len(events), 60)

        # Change default width to be 4 seconds instead of 2.
        settings.storage.cassandra.timewidth_seconds = 4
        router.reload()

        # All of these events should now go into bucket with start time 0.
        for i in xrange(60):
            self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(i % 4)}])
        events = self.get(stream_name, 0, epoch_time_to_kronos_time(4))
        self.assertEqual(len(events), 120)

        shard_to_events = defaultdict(int)
        for start_time in (0, 2):
            # Fetch events directly from each shard.
            for shard in xrange(self.shards):
                stream_shard = StreamShard(
                    stream.namespace,
                    stream_name,
                    epoch_time_to_kronos_time(start_time),
                    self.width,
                    shard,
                    False,
                    MAX_LIMIT,
                    100,
                )
                events = stream_shard.iterator(
                    uuid_from_time(start_time, UUIDType.LOWEST), uuid_from_time(start_time + 4)
                )
                shard_to_events[start_time] += len(list(events))
        self.assertEqual(len(shard_to_events), 2)
        self.assertEqual(shard_to_events[0], 90)  # 30 + 60.
        self.assertEqual(shard_to_events[2], 30)  # 30 + 0.
        self.assertEqual(sum(shard_to_events.itervalues()), 120)

        # Revert default width settings.
        settings.storage.cassandra.timewidth_seconds = 2
        router.reload()
Example #6
0
def _make_event(t):
    return {
        TIMESTAMP_FIELD: epoch_time_to_kronos_time(t),
        'property1': str(uuid.uuid4()),
        'property2': str(uuid.uuid4()),
        'property3': str(uuid.uuid4())
    }
Example #7
0
    def test_stream_splitting(self):
        """ Inserts events that fall into multiple time buckets and ensures that
    each time bucket has the right number of events at the end and all the
    events in each bucket fall into its time range. """

        stream_name = "TestCassandraBackend_test_stream_splitting"
        stream = self.namespace.get_stream(stream_name, self.width, self.shards)
        settings.storage.cassandra.timewidth_seconds = 2
        router.reload()

        # Each bucket interval has width of 2 seconds, so this should distribute
        # events in 5 buckets: [0, 2), [2, 4), [4, 6), [6, 8), [8, 10).
        for i in xrange(100):
            self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(i % 10)}])
        events = self.get(stream_name, 0, epoch_time_to_kronos_time(10))
        self.assertEqual(len(events), 100)

        bucket_to_events = defaultdict(list)
        for start_time in (0, 2, 4, 6, 8):
            # Fetch events directly from each bucket.
            for shard in xrange(self.shards):
                stream_shard = StreamShard(
                    stream.namespace,
                    stream_name,
                    epoch_time_to_kronos_time(start_time),
                    self.width,
                    shard,
                    False,
                    MAX_LIMIT,
                    100,
                )
                events = stream_shard.iterator(
                    uuid_from_time(start_time, UUIDType.LOWEST), uuid_from_time(start_time + self.width_seconds)
                )
                bucket_to_events[start_time].extend(marshal.loads(event.json) for event in events)

        num_events = 0
        for start_time, events in bucket_to_events.iteritems():
            # Each bucket should have 20 events and they must fall in the bucket's
            # time range.
            self.assertEqual(len(events), 20)
            for event in events:
                time = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD])
                self.assertTrue(time >= start_time)
                self.assertTrue(time < start_time + self.width_seconds)
            num_events += len(events)
        self.assertEqual(num_events, 100)
Example #8
0
  def test_overlapping_shards(self):
    ''' Tests that changing bucket widths doesn\'t break shit. First inserts
    events into two contiguous shards when the bucket width is 2 seconds. Then
    it changes bucket width to 4 and inserts the same events again, causing
    all events to go into a single bucket. At the end it checks if the first
    bucket has 3x the number of events as the seconds bucket. '''

    stream_name = 'TestCassandraBackend_test_overlapping_shards'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)

    for i in xrange(60):
      self.put(stream_name,
               [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(i % 4)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(4))
    self.assertEqual(len(events), 60)

    # Change default width to be 4 seconds instead of 2.
    settings.storage.cassandra.timewidth_seconds = 4
    router.reload()

    # All of these events should now go into bucket with start time 0.
    for i in xrange(60):
      self.put(stream_name, [{TIMESTAMP_FIELD:
                              epoch_time_to_kronos_time(i % 4)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(4))
    self.assertEqual(len(events), 120)

    shard_to_events = defaultdict(int)
    for start_time in (0, 2):
      # Fetch events directly from each shard.
      for shard in xrange(self.shards):
        stream_shard = StreamShard(stream.namespace, stream_name,
                                   epoch_time_to_kronos_time(start_time),
                                   self.width, shard, False,
                                   MAX_LIMIT, 100)
        events = stream_shard.iterator(uuid_from_time(start_time,
                                                      UUIDType.LOWEST),
                                       uuid_from_time(start_time + 4))
        shard_to_events[start_time] += len(list(events))
    self.assertEqual(len(shard_to_events), 2)
    self.assertEqual(shard_to_events[0], 90)  # 30 + 60.
    self.assertEqual(shard_to_events[2], 30)  # 30 + 0.
    self.assertEqual(sum(shard_to_events.itervalues()), 120)

    # Revert default width settings.
    settings.storage.cassandra.timewidth_seconds = 2
    router.reload()
Example #9
0
def infer_schema(namespace, stream):
    now = epoch_time_to_kronos_time(time.time())
    backend, configuration = router.backend_to_retrieve(namespace, stream)
    events = backend.retrieve(namespace, stream, 0, now, None, configuration, order=ResultOrder.DESCENDING, limit=100)
    schema_type = NullType()
    for event in events:
        schema_type = schema_type.combine(get_schema_type(marshal.loads(event)))
    schema = schema_type.to_dict()
    schema["$schema"] = "http://json-schema.org/draft-04/schema"
    return schema
Example #10
0
  def test_stream_splitting(self):
    ''' Inserts events that fall into multiple time buckets and ensures that
    each time bucket has the right number of events at the end and all the
    events in each bucket fall into its time range. '''

    stream_name = 'TestCassandraBackend_test_stream_splitting'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)
    settings.storage.cassandra.timewidth_seconds = 2
    router.reload()

    # Each bucket interval has width of 2 seconds, so this should distribute
    # events in 5 buckets: [0, 2), [2, 4), [4, 6), [6, 8), [8, 10).
    for i in xrange(100):
      self.put(stream_name, [{TIMESTAMP_FIELD:
                              epoch_time_to_kronos_time(i % 10)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(10))
    self.assertEqual(len(events), 100)

    bucket_to_events = defaultdict(list)
    for start_time in (0, 2, 4, 6, 8):
      # Fetch events directly from each bucket.
      for shard in xrange(self.shards):
        stream_shard = StreamShard(stream.namespace, stream_name,
                                   epoch_time_to_kronos_time(start_time),
                                   self.width, shard, False,
                                   MAX_LIMIT, 100)
        events = stream_shard.iterator(
          uuid_from_time(start_time, UUIDType.LOWEST),
          uuid_from_time(start_time + self.width_seconds))
        bucket_to_events[start_time].extend(marshal.loads(event.json)
                                            for event in events)

    num_events = 0
    for start_time, events in bucket_to_events.iteritems():
      # Each bucket should have 20 events and they must fall in the bucket's
      # time range.
      self.assertEqual(len(events), 20)
      for event in events:
        time = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD])
        self.assertTrue(time >= start_time)
        self.assertTrue(time < start_time + self.width_seconds)
      num_events += len(events)
    self.assertEqual(num_events, 100)
Example #11
0
def validate_event_and_assign_id(event):
  """
  Ensure that the event has a valid time. Assign a random UUID based on the
  event time.
  """
  event_time = event.get(TIMESTAMP_FIELD)

  if event_time is None:
    event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time(time.time())
  elif type(event_time) not in (int, long):
    raise InvalidEventTime(event_time)

  # Generate a uuid1-like sequence from the event time with the non-time bytes
  # set to random values.
  _id = uuid_from_kronos_time(event_time)
  event[ID_FIELD] = str(_id)
  return _id, event
Example #12
0
def infer_schema(namespace, stream):
    now = epoch_time_to_kronos_time(time.time())
    backend, configuration = router.backend_to_retrieve(namespace, stream)
    events = backend.retrieve(namespace,
                              stream,
                              0,
                              now,
                              None,
                              configuration,
                              order=ResultOrder.DESCENDING,
                              limit=100)
    schema_type = NullType()
    for event in events:
        schema_type = schema_type.combine(get_schema_type(
            marshal.loads(event)))
    schema = schema_type.to_dict()
    schema['$schema'] = 'http://json-schema.org/draft-04/schema'
    return schema
Example #13
0
def validate_event_and_assign_id(event):
    """
  Ensure that the event has a valid time. Assign a random UUID based on the
  event time.
  """
    event_time = event.get(TIMESTAMP_FIELD)

    if event_time is None:
        event[TIMESTAMP_FIELD] = event_time = epoch_time_to_kronos_time(
            time.time())
    elif type(event_time) not in (int, long):
        raise InvalidEventTime(event_time)

    # Generate a uuid1-like sequence from the event time with the non-time bytes
    # set to random values.
    _id = uuid_from_kronos_time(event_time)
    event[ID_FIELD] = str(_id)
    return _id, event
Example #14
0
def _make_event(t):
  return {TIMESTAMP_FIELD: epoch_time_to_kronos_time(t),
          'property1': str(uuid.uuid4()),
          'property2': str(uuid.uuid4()),
          'property3': str(uuid.uuid4())}
Example #15
0
    def test_get(self):
        stream = 'TestKronosAPIs_test_get'
        event1 = [{'a': 1, TIMESTAMP_FIELD: 1}]
        event2 = [{'a': 2, TIMESTAMP_FIELD: 2}]
        event3 = [{'a': 3, TIMESTAMP_FIELD: 3}]
        event4 = [{'a': 4, TIMESTAMP_FIELD: 3}]

        # Test get from non-existent streams.
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 0)

        # Test get with intervals that have and don't have events.
        self.put(stream, event1)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 1)
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 0)

        # Test get with different time slices.
        self.put(stream, event2)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 1)
        event2_id = events[0][ID_FIELD]

        self.put(stream, event3)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get for overlapping time events.
        self.put(stream, event4)
        events = self.get(stream, 0, 4)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 2, 4)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 3, 4)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get for `start_time` and `end_time` inclusivity.
        events = self.get(stream, 1, 3)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))

        # Test get with `start_id`.
        events = self.get(stream, None, 4, start_id=event2_id)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        for event in events:
            self.assertEqual(event[TIMESTAMP_FIELD], 3)

        # Test get with `limit`.
        events = self.get(stream, 0, 4, limit=2)
        self.assertEqual(len(events), 2)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
        self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
        events = self.get(stream, 0, 4, limit=3)
        self.assertEqual(len(events), 3)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
        self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
        self.assertEqual(events[2][TIMESTAMP_FIELD], 3)
        events = self.get(stream, 0, 4, limit=0)
        self.assertEqual(len(events), 0)

        # Test get with `order`.
        events = self.get(stream, 0, 4, order=ResultOrder.ASCENDING)
        self.assertEqual(len(events), 4)
        self.assertEqual(events,
                         sorted(events, key=lambda e: TimeUUID(e[ID_FIELD])))
        events = self.get(stream, 0, 4, order=ResultOrder.DESCENDING)
        self.assertEqual(len(events), 4)
        self.assertEqual(
            events,
            sorted(events,
                   key=lambda e: TimeUUID(e[ID_FIELD], descending=True)))

        # Test get with weird time ranges.
        # `start_time` == `end_time`
        self.assertEqual(len(self.get(stream, 3, 3)), 2)
        self.assertEqual(len(self.get(stream, 4, 4)), 0)

        # `start_time` and `end_time` in the future.
        now = epoch_time_to_kronos_time(time.time())
        self.assertEqual(
            len(
                self.get(stream, now + epoch_time_to_kronos_time(1000),
                         now + epoch_time_to_kronos_time(2000))), 0)
        # `start_time` > `end_time`
        self.assertEqual(len(self.get(stream, 10, 5)), 0)

        # `start_time` < 0 and `end_time` < 0
        self.assertEqual(len(self.get(stream, -2000, -1000)), 0)
Example #16
0
 def setUp(self):
   super(TestCassandraBackend, self).setUp()
   self.namespace = router.get_backend('cassandra').namespaces['kronos']
   self.shards = settings.storage['cassandra']['shards_per_bucket']
   self.width_seconds = settings.storage['cassandra']['timewidth_seconds']
   self.width = epoch_time_to_kronos_time(self.width_seconds)
Example #17
0
 def get_stream(self, namespace, stream, configuration):
   namespace = self.namespaces[namespace]
   width = epoch_time_to_kronos_time(configuration['timewidth_seconds'])
   return namespace.get_stream(stream, width,
                               int(configuration['shards_per_bucket']))
Example #18
0
  def test_get(self):
    stream = 'TestKronosAPIs_test_get'
    event1 = [{'a': 1, TIMESTAMP_FIELD: 1}]
    event2 = [{'a': 2, TIMESTAMP_FIELD: 2}]
    event3 = [{'a': 3, TIMESTAMP_FIELD: 3}]
    event4 = [{'a': 4, TIMESTAMP_FIELD: 3}]

    # Test get from non-existent streams.
    events = self.get(stream, 0, 4)
    self.assertEqual(len(events), 0)

    # Test get with intervals that have and don't have events.
    self.put(stream, event1)
    events = self.get(stream, 0, 4)
    self.assertEqual(len(events), 1)
    events = self.get(stream, 2, 4)
    self.assertEqual(len(events), 0)

    # Test get with different time slices.
    self.put(stream, event2)
    events = self.get(stream, 0, 4)
    self.assertEqual(len(events), 2)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    events = self.get(stream, 2, 4)
    self.assertEqual(len(events), 1)
    event2_id = events[0][ID_FIELD]

    self.put(stream, event3)
    events = self.get(stream, 0, 4)
    self.assertEqual(len(events), 3)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    events = self.get(stream, 2, 4)
    self.assertEqual(len(events), 2)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    # Test get for overlapping time events.
    self.put(stream, event4)
    events = self.get(stream, 0, 4)
    self.assertEqual(len(events), 4)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    events = self.get(stream, 2, 4)
    self.assertEqual(len(events), 3)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    events = self.get(stream, 3, 4)
    self.assertEqual(len(events), 2)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    # Test get for `start_time` and `end_time` inclusivity.
    events = self.get(stream, 1, 3)
    self.assertEqual(len(events), 4)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))

    # Test get with `start_id`.
    events = self.get(stream, None, 4, start_id=event2_id)
    self.assertEqual(len(events), 2)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    for event in events:
      self.assertEqual(event[TIMESTAMP_FIELD], 3)

    # Test get with `limit`.
    events = self.get(stream, 0, 4, limit=2)
    self.assertEqual(len(events), 2)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
    self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
    events = self.get(stream, 0, 4, limit=3)
    self.assertEqual(len(events), 3)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    self.assertEqual(events[0][TIMESTAMP_FIELD], 1)
    self.assertEqual(events[1][TIMESTAMP_FIELD], 2)
    self.assertEqual(events[2][TIMESTAMP_FIELD], 3)
    events = self.get(stream, 0, 4, limit=0)
    self.assertEqual(len(events), 0)

    # Test get with `order`.
    events = self.get(stream, 0, 4, order=ResultOrder.ASCENDING)
    self.assertEqual(len(events), 4)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD])))
    events = self.get(stream, 0, 4, order=ResultOrder.DESCENDING)
    self.assertEqual(len(events), 4)
    self.assertEqual(events, sorted(events,
                                    key=lambda e: TimeUUID(e[ID_FIELD],
                                                           descending=True)))

    # Test get with weird time ranges.
    # `start_time` == `end_time`
    self.assertEqual(len(self.get(stream, 3, 3)), 2)
    self.assertEqual(len(self.get(stream, 4, 4)), 0)

    # `start_time` and `end_time` in the future.
    now = epoch_time_to_kronos_time(time.time())
    self.assertEqual(len(self.get(stream,
                                  now + epoch_time_to_kronos_time(1000),
                                  now + epoch_time_to_kronos_time(2000))), 0)
    # `start_time` > `end_time`
    self.assertEqual(len(self.get(stream, 10, 5)), 0)

    # `start_time` < 0 and `end_time` < 0
    self.assertEqual(len(self.get(stream, -2000, -1000)), 0)
Example #19
0
def read(stream, n, start=0, end=1000):
    for _ in kronos.get(stream,
                        epoch_time_to_kronos_time(start),
                        epoch_time_to_kronos_time(end),
                        limit=n):
        pass
Example #20
0
def read(stream, n, start=0, end=1000):
  for _ in kronos.get(stream,
                      epoch_time_to_kronos_time(start),
                      epoch_time_to_kronos_time(end),
                      limit=n):
    pass
Example #21
0
 def setUp(self):
     super(TestCassandraBackend, self).setUp()
     self.namespace = router.get_backend("cassandra").namespaces["kronos"]
     self.shards = settings.storage["cassandra"]["shards_per_bucket"]
     self.width_seconds = settings.storage["cassandra"]["timewidth_seconds"]
     self.width = epoch_time_to_kronos_time(self.width_seconds)
Example #22
0
def uuid_from_time(time, uuid_type=UUIDType.RANDOM):
  return uuid_from_kronos_time(epoch_time_to_kronos_time(time), uuid_type)
Example #23
0
class CassandraStorage(BaseStorage):
  SETTINGS_VALIDATORS = {
    'timewidth_seconds':
      lambda x: (is_pos_int(x) and  # noqa
                 epoch_time_to_kronos_time(int(x)) <= Stream.MAX_WIDTH),
    'shards_per_bucket': is_pos_int,
    'hosts': is_list,
    'keyspace_prefix': is_non_empty_str,
    'replication_factor': is_pos_int,
    'read_size': is_pos_int
  }

  def __init__(self, name, namespaces, **settings):
    """
    Check that settings contains all of the required parameters in the right
    format, then setup a connection to the specified Cassandra instance.
    """
    super(CassandraStorage, self).__init__(name, namespaces, **settings)
    self.namespaces = {}
    self.setup_cassandra(namespaces)

  def setup_cassandra(self, namespaces):
    """
    Set up a connection to the specified Cassandra cluster and create the
    specified keyspaces if they dont exist.
    """
    connections_to_shutdown = []
    self.cluster = Cluster(self.hosts)

    for namespace_name in namespaces:
      keyspace = '%s_%s' % (self.keyspace_prefix, namespace_name)
      namespace = Namespace(self.cluster, keyspace,
                            self.replication_factor, self.read_size)
      connections_to_shutdown.append(namespace.session)
      self.namespaces[namespace_name] = namespace

    # Shutdown Cluster instance after shutting down all Sessions.
    connections_to_shutdown.append(self.cluster)

    # Shutdown all connections to Cassandra before exiting Python interpretter.
    atexit.register(lambda: map(lambda c: c.shutdown(),
                                connections_to_shutdown))

  def is_alive(self):
    """
    Is our connection to Cassandra alive?
    """
    return not bool(self.cluster.is_shutdown)

  def get_stream(self, namespace, stream, configuration):
    namespace = self.namespaces[namespace]
    width = epoch_time_to_kronos_time(configuration['timewidth_seconds'])
    return namespace.get_stream(stream, width,
                                int(configuration['shards_per_bucket']))

  def _insert(self, namespace, stream, events, configuration):
    """
    Store the specified events for the named stream.
    `stream` : The name of a stream.
    `events` : A list of events to store for the named stream.
    `configuration` : A dictionary of settings to override any default settings,
                      such as number of shards or width of a time interval.
    """
    stream = self.get_stream(namespace, stream, configuration)
    stream.insert(events)

  def _delete(self, namespace, stream, start_id, end_time, configuration):
    """
    Delete events for `stream` between `start_id` and `end_time`.
    `stream` : The stream to delete events for.
    `start_id` : Delete events with id > `start_id`.
    `end_time` : Delete events ending <= `end_time`.
    `configuration` : A dictionary of settings to override any default
                      settings, such as number of shards or width of a
                      time interval.
    """
    stream = self.get_stream(namespace, stream, configuration)
    return stream.delete(start_id,
                         uuid_from_kronos_time(end_time,
                                               _type=UUIDType.HIGHEST))

  def _retrieve(self, namespace, stream, start_id, end_time, order, limit,
                configuration):
    """
    Retrieve events for `stream` between `start_id` and `end_time`.
    `stream` : The stream to return events for.
    `start_id` : Return events with id > `start_id`.
    `end_time` : Return events ending <= `end_time`.
    `order` : Whether to return the results in ResultOrder.ASCENDING
              or ResultOrder.DESCENDING time-order.
    `configuration` : A dictionary of settings to override any default
                      settings, such as number of shards or width of a
                      time interval.
    """
    stream = self.get_stream(namespace, stream, configuration)
    events = stream.iterator(start_id,
                             uuid_from_kronos_time(end_time,
                                                   _type=UUIDType.HIGHEST),
                             order == ResultOrder.DESCENDING, limit)
    events = events.__iter__()
    event = events.next()
    # If first event's ID is equal to `start_id`, skip it.
    if event.id != start_id:
      yield event.json
    while True:
      yield events.next().json

  def _streams(self, namespace):
    for stream_name in self.namespaces[namespace].list_streams():
      yield stream_name

  def _clear(self):
    for namespace in self.namespaces.itervalues():
      namespace.drop()
      namespace.create_session()
Example #24
0
 def setUp(self):
   super(TestCassandraBackend, self).setUp()
   self.namespace = router.get_backend('cassandra').namespaces['kronos']
   self.shards = settings.storage['cassandra']['shards_per_bucket']
   self.width_seconds = settings.storage['cassandra']['timewidth_seconds']
   self.width = epoch_time_to_kronos_time(self.width_seconds)
Example #25
0
 def get_stream(self, namespace, stream, configuration):
   namespace = self.namespaces[namespace]
   width = epoch_time_to_kronos_time(configuration['timewidth_seconds'])
   return namespace.get_stream(stream, width,
                               int(configuration['shards_per_bucket']))
Example #26
0
def uuid_from_time(time, uuid_type=UUIDType.RANDOM):
  return uuid_from_kronos_time(epoch_time_to_kronos_time(time), uuid_type)