Esempio n. 1
0
 def test_read_all_many_file_patterns(self):
   pattern1, expected_data1 = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data1) == 40
   pattern2, expected_data2 = write_pattern([3, 7, 9])
   assert len(expected_data2) == 19
   pattern3, expected_data3 = write_pattern([11, 20, 5, 5])
   assert len(expected_data3) == 41
   expected_data = []
   expected_data.extend(expected_data1)
   expected_data.extend(expected_data2)
   expected_data.extend(expected_data3)
   pipeline = TestPipeline()
   pcoll = pipeline | 'Create' >> Create(
       [pattern1, pattern2, pattern3]) |'ReadAll' >> ReadAllFromText()
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 2
0
 def test_read_all_many_file_patterns(self):
   pattern1, expected_data1 = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data1) == 40
   pattern2, expected_data2 = write_pattern([3, 7, 9])
   assert len(expected_data2) == 19
   pattern3, expected_data3 = write_pattern([11, 20, 5, 5])
   assert len(expected_data3) == 41
   expected_data = []
   expected_data.extend(expected_data1)
   expected_data.extend(expected_data2)
   expected_data.extend(expected_data3)
   pipeline = TestPipeline()
   pcoll = pipeline | 'Create' >> Create(
       [pattern1, pattern2, pattern3]) |'ReadAll' >> ReadAllFromText()
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 3
0
 def test_read_from_text_file_pattern(self):
   pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data) == 40
   pipeline = TestPipeline()
   pcoll = pipeline | 'Read' >> ReadFromText(pattern)
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 4
0
 def test_read_from_text_file_pattern(self):
   pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data) == 40
   pipeline = TestPipeline()
   pcoll = pipeline | 'Read' >> ReadFromText(pattern)
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 5
0
 def test_read_all_file_pattern(self):
   pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data) == 40
   pipeline = TestPipeline()
   pcoll = (pipeline
            | 'Create' >> Create([pattern])
            |'ReadAll' >> ReadAllFromText())
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 6
0
 def test_read_all_file_pattern(self):
   pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4])
   assert len(expected_data) == 40
   pipeline = TestPipeline()
   pcoll = (pipeline
            | 'Create' >> Create([pattern])
            |'ReadAll' >> ReadAllFromText())
   assert_that(pcoll, equal_to(expected_data))
   pipeline.run()
Esempio n. 7
0
 def test_read_file_pattern(self):
   pattern, expected_data = write_pattern(
       [TextSourceTest.DEFAULT_NUM_RECORDS * 5,
        TextSourceTest.DEFAULT_NUM_RECORDS * 3,
        TextSourceTest.DEFAULT_NUM_RECORDS * 12,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 4])
   assert len(expected_data) == TextSourceTest.DEFAULT_NUM_RECORDS * 40
   self._run_read_test(pattern, expected_data)
Esempio n. 8
0
 def test_read_file_pattern(self):
   pattern, expected_data = write_pattern(
       [TextSourceTest.DEFAULT_NUM_RECORDS * 5,
        TextSourceTest.DEFAULT_NUM_RECORDS * 3,
        TextSourceTest.DEFAULT_NUM_RECORDS * 12,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 4])
   assert len(expected_data) == TextSourceTest.DEFAULT_NUM_RECORDS * 40
   self._run_read_test(pattern, expected_data)
Esempio n. 9
0
  def test_read_skip_header_pattern_insufficient_lines(self):
    line_counts = [
        5, 3, # Fewer lines in file than we want to skip
        12, 8, 8, 4
    ]
    skip_header_lines = 4
    pattern, data = write_pattern(line_counts)

    data = self._remove_lines(data, line_counts, skip_header_lines)
    read_data = self._read_skip_header_lines(pattern, skip_header_lines)
    self.assertEqual(len(data), len(read_data))
    self.assertCountEqual(data, read_data)
Esempio n. 10
0
 def test_read_file_pattern_with_empty_files(self):
   pattern, expected_data = write_pattern(
       [5 * TextSourceTest.DEFAULT_NUM_RECORDS,
        3 * TextSourceTest.DEFAULT_NUM_RECORDS,
        12 * TextSourceTest.DEFAULT_NUM_RECORDS,
        8 * TextSourceTest.DEFAULT_NUM_RECORDS,
        8 * TextSourceTest.DEFAULT_NUM_RECORDS,
        4 * TextSourceTest.DEFAULT_NUM_RECORDS],
       no_data=True)
   assert len(expected_data) == 40 * TextSourceTest.DEFAULT_NUM_RECORDS
   assert not expected_data[0]
   self._run_read_test(pattern, expected_data)
Esempio n. 11
0
  def test_read_skip_header_pattern_insufficient_lines(self):
    line_counts = [
        5, 3, # Fewer lines in file than we want to skip
        12, 8, 8, 4
    ]
    skip_header_lines = 4
    pattern, data = write_pattern(line_counts)

    data = self._remove_lines(data, line_counts, skip_header_lines)
    read_data = self._read_skip_header_lines(pattern, skip_header_lines)
    self.assertEqual(len(data), len(read_data))
    self.assertCountEqual(data, read_data)
Esempio n. 12
0
 def test_read_file_pattern_with_empty_files(self):
   pattern, expected_data = write_pattern(
       [5 * TextSourceTest.DEFAULT_NUM_RECORDS,
        3 * TextSourceTest.DEFAULT_NUM_RECORDS,
        12 * TextSourceTest.DEFAULT_NUM_RECORDS,
        8 * TextSourceTest.DEFAULT_NUM_RECORDS,
        8 * TextSourceTest.DEFAULT_NUM_RECORDS,
        4 * TextSourceTest.DEFAULT_NUM_RECORDS],
       no_data=True)
   assert len(expected_data) == 40 * TextSourceTest.DEFAULT_NUM_RECORDS
   assert not expected_data[0]
   self._run_read_test(pattern, expected_data)
Esempio n. 13
0
  def test_read_skip_header_pattern(self):
    line_counts = [
        TextSourceTest.DEFAULT_NUM_RECORDS * 5,
        TextSourceTest.DEFAULT_NUM_RECORDS * 3,
        TextSourceTest.DEFAULT_NUM_RECORDS * 12,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 4
    ]
    skip_header_lines = 2
    pattern, data = write_pattern(line_counts)

    expected_data = self._remove_lines(data, line_counts, skip_header_lines)
    read_data = self._read_skip_header_lines(pattern, skip_header_lines)
    self.assertEqual(len(expected_data), len(read_data))
    self.assertCountEqual(expected_data, read_data)
Esempio n. 14
0
  def test_read_skip_header_pattern(self):
    line_counts = [
        TextSourceTest.DEFAULT_NUM_RECORDS * 5,
        TextSourceTest.DEFAULT_NUM_RECORDS * 3,
        TextSourceTest.DEFAULT_NUM_RECORDS * 12,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 8,
        TextSourceTest.DEFAULT_NUM_RECORDS * 4
    ]
    skip_header_lines = 2
    pattern, data = write_pattern(line_counts)

    expected_data = self._remove_lines(data, line_counts, skip_header_lines)
    read_data = self._read_skip_header_lines(pattern, skip_header_lines)
    self.assertEqual(len(expected_data), len(read_data))
    self.assertCountEqual(expected_data, read_data)