Ejemplo n.º 1
0
 def test_clear(self):
     cache = StreamingCache(cache_dir=None)
     self.assertFalse(cache.exists('my_label'))
     cache.write([TestStreamFileRecord()], 'my_label')
     self.assertTrue(cache.exists('my_label'))
     self.assertTrue(cache.clear('my_label'))
     self.assertFalse(cache.exists('my_label'))
  def test_size(self):
    cache = StreamingCache(cache_dir=None)
    cache.write([TestStreamFileRecord()], 'my_label')
    coder = cache.load_pcoder('my_label')

    # Add one because of the new-line character that is also written.
    size = len(coder.encode(TestStreamFileRecord().SerializeToString())) + 1
    self.assertEqual(cache.size('my_label'), size)
Ejemplo n.º 3
0
    def test_exists(self):
        cache = StreamingCache(cache_dir=None)
        self.assertFalse(cache.exists('my_label'))
        cache.write([TestStreamFileRecord()], 'my_label')
        self.assertTrue(cache.exists('my_label'))

        # '' shouldn't be treated as a wildcard to match everything.
        self.assertFalse(cache.exists(''))
 def test_clear(self):
   cache = StreamingCache(cache_dir=None)
   self.assertFalse(cache.exists('my_label'))
   cache.sink(['my_label'], is_capture=True)
   cache.write([TestStreamFileRecord()], 'my_label')
   self.assertTrue(cache.exists('my_label'))
   self.assertEqual(cache.capture_keys, set(['my_label']))
   self.assertTrue(cache.clear('my_label'))
   self.assertFalse(cache.exists('my_label'))
   self.assertFalse(cache.capture_keys)
  def test_empty(self):
    CACHED_PCOLLECTION_KEY = repr(CacheKey('arbitrary_key', '', '', ''))

    cache = StreamingCache(cache_dir=None)
    self.assertFalse(cache.exists(CACHED_PCOLLECTION_KEY))
    cache.write([], CACHED_PCOLLECTION_KEY)
    reader, _ = cache.read(CACHED_PCOLLECTION_KEY)

    # Assert that an empty reader returns an empty list.
    self.assertFalse([e for e in reader])
Ejemplo n.º 6
0
 def test_capture_size_limit_not_reached_when_file_size_under_limit(self):
     ib.options.capture_size_limit = 100
     cache = StreamingCache(cache_dir=None)
     # Build a sink object to track the label as a capture in the test.
     cache.sink(['my_label'], is_capture=True)
     cache.write([TestStreamFileRecord()], 'my_label')
     self.assertTrue(cache.exists('my_label'))
     ie.current_env().set_cache_manager(cache)
     self.assertFalse(ie.current_env().options.capture_control.
                      is_capture_size_limit_reached())
Ejemplo n.º 7
0
def _setup_test_streaming_cache():
    cache_manager = StreamingCache(cache_dir=None)
    ie.current_env().set_cache_manager(cache_manager)
    builder = FileRecordsBuilder(tag=_TEST_CACHE_KEY)
    (builder
        .advance_watermark(watermark_secs=0)
        .advance_processing_time(5)
        .add_element(element='a', event_time_secs=1)
        .advance_watermark(watermark_secs=100)
        .advance_processing_time(10)) # yapf: disable
    cache_manager.write(builder.build(), _TEST_CACHE_KEY)
Ejemplo n.º 8
0
  def test_capture_size_limit_not_reached_when_file_size_under_limit(self):
    ib.options.capture_size_limit = 100
    cache = StreamingCache(cache_dir=None)
    # Build a sink object to track the label as a capture in the test.
    cache.sink(['my_label'], is_capture=True)
    cache.write([beam_interactive_api_pb2.TestStreamFileRecord()], 'my_label')
    self.assertTrue(cache.exists('my_label'))
    ie.current_env().set_cache_manager(cache, 'dummy pipeline')

    limiter = capture_limiters.SizeLimiter(ib.options.capture_size_limit)
    self.assertFalse(limiter.is_triggered())
Ejemplo n.º 9
0
    def test_single_reader_with_processing_time_limiter(self):
        """Tests that we expect to see all the correctly emitted TestStreamPayloads.
    """
        CACHED_PCOLLECTION_KEY = repr(CacheKey('arbitrary_key', '', '', ''))

        values = (FileRecordsBuilder(tag=CACHED_PCOLLECTION_KEY)
                  .advance_processing_time(1e-6)
                  .add_element(element=0, event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=1, event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=2, event_time_secs=2)
                  .advance_processing_time(1)
                  .add_element(element=3, event_time_secs=2)
                  .advance_processing_time(1)
                  .add_element(element=4, event_time_secs=2)
                  .build()) # yapf: disable

        cache = StreamingCache(cache_dir=None)
        cache.write(values, CACHED_PCOLLECTION_KEY)

        reader, _ = cache.read(CACHED_PCOLLECTION_KEY,
                               limiters=[ProcessingTimeLimiter(2)])
        coder = coders.FastPrimitivesCoder()
        events = list(reader)

        # Units here are in microseconds.
        # Expects that the elements are a slice of the original values where all
        # processing time is less than the duration.
        expected = [
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1)),
            TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(
                    elements=[
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode(0), timestamp=0)
                    ],
                    tag=CACHED_PCOLLECTION_KEY)),
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1 * 10**6)),
            TestStreamPayload.Event(element_event=TestStreamPayload.Event.
                                    AddElements(elements=[
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode(1),
                                            timestamp=1 * 10**6)
                                    ],
                                                tag=CACHED_PCOLLECTION_KEY)),
        ]
        self.assertSequenceEqual(events, expected)
Ejemplo n.º 10
0
    def test_single_reader(self):
        """Tests that we expect to see all the correctly emitted TestStreamPayloads.
    """
        CACHED_PCOLLECTION_KEY = 'arbitrary_key'

        values = (FileRecordsBuilder(tag=CACHED_PCOLLECTION_KEY)
                  .add_element(element=0, event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=1, event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=2, event_time_secs=2)
                  .build()) # yapf: disable

        cache = StreamingCache(cache_dir=None)
        cache.write(values, CACHED_PCOLLECTION_KEY)

        reader, _ = cache.read(CACHED_PCOLLECTION_KEY)
        coder = coders.FastPrimitivesCoder()
        events = list(reader)

        # Units here are in microseconds.
        expected = [
            TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(
                    elements=[
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode(0), timestamp=0)
                    ],
                    tag=CACHED_PCOLLECTION_KEY)),
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1 * 10**6)),
            TestStreamPayload.Event(element_event=TestStreamPayload.Event.
                                    AddElements(elements=[
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode(1),
                                            timestamp=1 * 10**6)
                                    ],
                                                tag=CACHED_PCOLLECTION_KEY)),
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1 * 10**6)),
            TestStreamPayload.Event(element_event=TestStreamPayload.Event.
                                    AddElements(elements=[
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode(2),
                                            timestamp=2 * 10**6)
                                    ],
                                                tag=CACHED_PCOLLECTION_KEY)),
        ]
        self.assertSequenceEqual(events, expected)
Ejemplo n.º 11
0
 def test_always_default_coder_for_test_stream_records(self):
     CACHED_NUMBERS = repr(CacheKey('numbers', '', '', ''))
     numbers = (FileRecordsBuilder(CACHED_NUMBERS)
                .advance_processing_time(2)
                .add_element(element=1, event_time_secs=0)
                .advance_processing_time(1)
                .add_element(element=2, event_time_secs=0)
                .advance_processing_time(1)
                .add_element(element=2, event_time_secs=0)
                .build()) # yapf: disable
     cache = StreamingCache(cache_dir=None)
     cache.write(numbers, CACHED_NUMBERS)
     self.assertIs(type(cache.load_pcoder(CACHED_NUMBERS)),
                   type(cache._default_pcoder))
Ejemplo n.º 12
0
 def test_capture_size_limit_reached_when_file_size_above_limit(self):
     ib.options.capture_size_limit = 1
     cache = StreamingCache(cache_dir=None)
     cache.sink(['my_label'], is_capture=True)
     cache.write([
         TestStreamFileRecord(recorded_event=TestStreamPayload.Event(
             element_event=TestStreamPayload.Event.AddElements(elements=[
                 TestStreamPayload.TimestampedElement(
                     encoded_element=coders.FastPrimitivesCoder().encode(
                         'a'),
                     timestamp=0)
             ])))
     ], 'my_label')
     self.assertTrue(cache.exists('my_label'))
     ie.current_env().set_cache_manager(cache)
     self.assertTrue(ie.current_env().options.capture_control.
                     is_capture_size_limit_reached())
Ejemplo n.º 13
0
    def test_capture_size_limit_reached_when_file_size_above_limit(self):
        ib.options.capture_size_limit = 1
        cache = StreamingCache(cache_dir=None)
        cache.sink(['my_label'], is_capture=True)
        cache.write([
            TestStreamFileRecord(recorded_event=TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coders.FastPrimitivesCoder().encode(
                            'a'),
                        timestamp=0)
                ])))
        ], 'my_label')
        self.assertTrue(cache.exists('my_label'))
        p = _build_an_empty_streaming_pipeline()
        ie.current_env().set_cache_manager(cache, p)

        limiter = capture_limiters.SizeLimiter(1)
        self.assertTrue(limiter.is_triggered())
Ejemplo n.º 14
0
    def test_streaming_cache_uses_local_ib_cache_root(self):
        """
    Checks that StreamingCache._cache_dir is set to the
    cache_root set under Interactive Beam for a local directory
    and that the cached values are the same as the values of a
    cache using default settings.
    """
        CACHED_PCOLLECTION_KEY = repr(CacheKey('arbitrary_key', '', '', ''))
        values = (FileRecordsBuilder(CACHED_PCOLLECTION_KEY)
                      .advance_processing_time(1)
                      .advance_watermark(watermark_secs=0)
                      .add_element(element=1, event_time_secs=0)
                      .build()) # yapf: disable

        local_cache = StreamingCache(cache_dir=None)
        local_cache.write(values, CACHED_PCOLLECTION_KEY)
        reader_one, _ = local_cache.read(CACHED_PCOLLECTION_KEY)
        pcoll_list_one = list(reader_one)

        # Set Interactive Beam specified cache dir to cloud storage
        ib.options.cache_root = '/tmp/it-test/'
        cache_manager_with_ib_option = StreamingCache(
            cache_dir=ib.options.cache_root)

        self.assertEqual(ib.options.cache_root,
                         cache_manager_with_ib_option._cache_dir)

        cache_manager_with_ib_option.write(values, CACHED_PCOLLECTION_KEY)
        reader_two, _ = cache_manager_with_ib_option.read(
            CACHED_PCOLLECTION_KEY)
        pcoll_list_two = list(reader_two)

        self.assertEqual(pcoll_list_one, pcoll_list_two)

        # Reset Interactive Beam setting
        ib.options.cache_root = None
  def test_multiple_readers(self):
    """Tests that the service advances the clock with multiple outputs.
    """

    CACHED_LETTERS = repr(CacheKey('letters', '', '', ''))
    CACHED_NUMBERS = repr(CacheKey('numbers', '', '', ''))
    CACHED_LATE = repr(CacheKey('late', '', '', ''))

    letters = (FileRecordsBuilder(CACHED_LETTERS)
               .advance_processing_time(1)
               .advance_watermark(watermark_secs=0)
               .add_element(element='a', event_time_secs=0)
               .advance_processing_time(10)
               .advance_watermark(watermark_secs=10)
               .add_element(element='b', event_time_secs=10)
               .build()) # yapf: disable

    numbers = (FileRecordsBuilder(CACHED_NUMBERS)
               .advance_processing_time(2)
               .add_element(element=1, event_time_secs=0)
               .advance_processing_time(1)
               .add_element(element=2, event_time_secs=0)
               .advance_processing_time(1)
               .add_element(element=2, event_time_secs=0)
               .build()) # yapf: disable

    late = (FileRecordsBuilder(CACHED_LATE)
            .advance_processing_time(101)
            .add_element(element='late', event_time_secs=0)
            .build()) # yapf: disable

    cache = StreamingCache(cache_dir=None)
    cache.write(letters, CACHED_LETTERS)
    cache.write(numbers, CACHED_NUMBERS)
    cache.write(late, CACHED_LATE)

    reader = cache.read_multiple([[CACHED_LETTERS], [CACHED_NUMBERS],
                                  [CACHED_LATE]])
    coder = coders.FastPrimitivesCoder()
    events = list(reader)

    # Units here are in microseconds.
    expected = [
        # Advances clock from 0 to 1
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=0, tag=CACHED_LETTERS)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('a'), timestamp=0)
                ],
                tag=CACHED_LETTERS)),

        # Advances clock from 1 to 2
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode(1), timestamp=0)
                ],
                tag=CACHED_NUMBERS)),

        # Advances clock from 2 to 3
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode(2), timestamp=0)
                ],
                tag=CACHED_NUMBERS)),

        # Advances clock from 3 to 4
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode(2), timestamp=0)
                ],
                tag=CACHED_NUMBERS)),

        # Advances clock from 4 to 11
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=7 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=10 * 10**6, tag=CACHED_LETTERS)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('b'), timestamp=10 * 10**6)
                ],
                tag=CACHED_LETTERS)),

        # Advances clock from 11 to 101
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=90 * 10**6)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('late'), timestamp=0)
                ],
                tag=CACHED_LATE)),
    ]

    self.assertSequenceEqual(events, expected)