Example #1
0
    def test_read_range_at_middle(self):
        fbs = LineSource('dummy_pattern', validate=False)

        file_name, expected_data = write_data(10)
        assert len(expected_data) == 10

        source = SingleFileSource(fbs, file_name, 0, 10 * 6)
        range_tracker = source.get_range_tracker(20, 40)
        read_data = [value for value in source.read(range_tracker)]
        self.assertItemsEqual(expected_data[4:7], read_data)
Example #2
0
  def test_read_range_at_middle(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10

    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    range_tracker = source.get_range_tracker(20, 40)
    read_data = [value for value in source.read(range_tracker)]
    self.assertCountEqual(expected_data[4:7], read_data)
Example #3
0
    def test_read_range_at_beginning(self):
        fbs = LineSource('dummy_pattern', validate=False)

        file_name, expected_data = write_data(10)
        assert len(expected_data) == 10

        source = SingleFileSource(fbs, file_name, 0, 10 * 6)
        range_tracker = source.get_range_tracker(0, 20)
        read_data = [value for value in source.read(range_tracker)]
        self.assertCountEqual(expected_data[:4], read_data)
  def test_read_range_at_beginning(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10

    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    range_tracker = source.get_range_tracker(0, 20)
    read_data = [value for value in source.read(range_tracker)]
    self.assertItemsEqual(expected_data[:4], read_data)
Example #5
0
  def test_estimates_size(self):
    fbs = LineSource('dummy_pattern', validate=False)

    # Should simply return stop_offset - start_offset
    source = SingleFileSource(
        fbs, file_name='dummy_file', start_offset=0, stop_offset=100)
    self.assertEquals(100, source.estimate_size())

    source = SingleFileSource(fbs, file_name='dummy_file', start_offset=10,
                              stop_offset=100)
    self.assertEquals(90, source.estimate_size())
Example #6
0
  def test_estimates_size(self):
    fbs = LineSource('dummy_pattern', validate=False)

    # Should simply return stop_offset - start_offset
    source = SingleFileSource(
        fbs, file_name='dummy_file', start_offset=0, stop_offset=100)
    self.assertEquals(100, source.estimate_size())

    source = SingleFileSource(fbs, file_name='dummy_file', start_offset=10,
                              stop_offset=100)
    self.assertEquals(90, source.estimate_size())
Example #7
0
    def test_produces_splits_desiredsize_large_than_size(self):
        fbs = LineSource('dummy_pattern', validate=False)

        file_name, expected_data = write_data(10)
        assert len(expected_data) == 10
        source = SingleFileSource(fbs, file_name, 0, 10 * 6)
        splits = [split for split in source.split(desired_bundle_size=100)]
        self.assertEquals(1, len(splits))
        self.assertEquals(60, splits[0].weight)
        self.assertEquals(0, splits[0].start_position)
        self.assertEquals(60, splits[0].stop_position)

        range_tracker = splits[0].source.get_range_tracker(None, None)
        read_data = [value for value in splits[0].source.read(range_tracker)]
        self.assertItemsEqual(expected_data, read_data)
Example #8
0
  def test_produces_splits_desiredsize_large_than_size(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10
    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    splits = [split for split in source.split(desired_bundle_size=100)]
    self.assertEquals(1, len(splits))
    self.assertEquals(60, splits[0].weight)
    self.assertEquals(0, splits[0].start_position)
    self.assertEquals(60, splits[0].stop_position)

    range_tracker = splits[0].source.get_range_tracker(None, None)
    read_data = [value for value in splits[0].source.read(range_tracker)]
    self.assertCountEqual(expected_data, read_data)
Example #9
0
    def test_produces_splits_desiredsize_smaller_than_size(self):
        fbs = LineSource('dummy_pattern', validate=False)

        file_name, expected_data = write_data(10)
        assert len(expected_data) == 10
        source = SingleFileSource(fbs, file_name, 0, 10 * 6)
        splits = [split for split in source.split(desired_bundle_size=25)]
        self.assertEquals(3, len(splits))

        read_data = []
        for split in splits:
            source = split.source
            range_tracker = source.get_range_tracker(split.start_position,
                                                     split.stop_position)
            data_from_split = [data for data in source.read(range_tracker)]
            read_data.extend(data_from_split)
        self.assertItemsEqual(expected_data, read_data)
Example #10
0
  def test_produces_splits_desiredsize_smaller_than_size(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10
    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    splits = [split for split in source.split(desired_bundle_size=25)]
    self.assertEquals(3, len(splits))

    read_data = []
    for split in splits:
      source = split.source
      range_tracker = source.get_range_tracker(split.start_position,
                                               split.stop_position)
      data_from_split = [data for data in source.read(range_tracker)]
      read_data.extend(data_from_split)
    self.assertCountEqual(expected_data, read_data)
  def test_produce_split_with_start_and_end_positions(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10
    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    splits = [split for split in
              source.split(desired_bundle_size=15, start_offset=10,
                           stop_offset=50)]
    self.assertEquals(3, len(splits))

    read_data = []
    for split in splits:
      source = split.source
      range_tracker = source.get_range_tracker(split.start_position,
                                               split.stop_position)
      data_from_split = [data for data in source.read(range_tracker)]
      read_data.extend(data_from_split)
    self.assertItemsEqual(expected_data[2:9], read_data)
Example #12
0
  def test_produce_split_with_start_and_end_positions(self):
    fbs = LineSource('dummy_pattern', validate=False)

    file_name, expected_data = write_data(10)
    assert len(expected_data) == 10
    source = SingleFileSource(fbs, file_name, 0, 10 * 6)
    splits = [split for split in
              source.split(desired_bundle_size=15, start_offset=10,
                           stop_offset=50)]
    self.assertEquals(3, len(splits))

    read_data = []
    for split in splits:
      source = split.source
      range_tracker = source.get_range_tracker(split.start_position,
                                               split.stop_position)
      data_from_split = [data for data in source.read(range_tracker)]
      read_data.extend(data_from_split)
    self.assertCountEqual(expected_data[2:9], read_data)