Esempio n. 1
0
    def test_pipeline_read_all_file_pattern(self):
        with temp_dir.TempDir() as tempdir:
            headers_1 = [self.lines[1], self.lines[-1]]
            headers_2 = [self.lines[2], self.lines[3], self.lines[-1]]
            headers_3 = [self.lines[4], self.lines[-1]]

            file_name_1 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_1)
            file_name_2 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_2)
            file_name_3 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_3)

            pipeline = TestPipeline()
            pcoll = (pipeline
                     | 'Create' >> beam.Create(
                         [os.path.join(tempdir.get_path(), '*.vcf')])
                     | 'ReadHeaders' >> ReadAllVcfHeaders())

            expected = [
                _get_vcf_header_from_lines(h, file_name=file_name)
                for h, file_name in [(
                    headers_1,
                    file_name_1), (headers_2,
                                   file_name_2), (headers_3, file_name_3)]
            ]
            assert_that(pcoll, asserts.header_vars_equal(expected))
            pipeline.run()
Esempio n. 2
0
    def test_pipeline_read_all_file_pattern(self):
        with temp_dir.TempDir() as tempdir:
            lines_1 = self.headers[1:2] + self.headers[-1:] + self.records[:2]
            lines_2 = self.headers[2:4] + self.headers[-1:] + self.records[2:4]
            lines_3 = self.headers[4:5] + self.headers[-1:] + self.records[4:]
            file_name_1 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_1)
            file_name_2 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_2)
            file_name_3 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_3)

            pipeline = TestPipeline()
            pcoll = pipeline | 'ReadHeaders' >> GetEstimates(
                os.path.join(tempdir.get_path(), '*.vcf'))
            pcoll = (pipeline
                     | 'Create' >> beam.Create(
                         [os.path.join(tempdir.get_path(), '*.vcf')])
                     | 'GetAllEstimates' >> GetAllEstimates())

            expected = [
                _get_estimate_from_lines(lines, file_name=file_name)
                for lines, file_name in [(
                    lines_1, file_name_1), (lines_2,
                                            file_name_2), (lines_3,
                                                           file_name_3)]
            ]
            assert_that(pcoll, asserts.header_vars_equal(expected))
            pipeline.run()
Esempio n. 3
0
  def test_read_file_pattern(self):
    with temp_dir.TempDir() as tempdir:
      headers_1 = [self.lines[1], self.lines[-1]]
      headers_2 = [self.lines[2], self.lines[3], self.lines[-1]]
      headers_3 = [self.lines[4], self.lines[-1]]
      file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=headers_1)
      file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=headers_2)
      file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=headers_3)

      actual = source_test_utils.read_from_source(VcfHeaderSource(
          os.path.join(tempdir.get_path(), '*.vcf')))

      expected = [_get_vcf_header_from_lines(h, file_name=file_name)
                  for h, file_name in [(headers_1, file_name_1),
                                       (headers_2, file_name_2),
                                       (headers_3, file_name_3)]]

      asserts.header_vars_equal(expected)(actual)
Esempio n. 4
0
    def test_read_file_pattern(self):
        with temp_dir.TempDir() as tempdir:
            lines_1 = self.headers[1:2] + self.headers[-1:] + self.records[:2]
            lines_2 = self.headers[2:4] + self.headers[-1:] + self.records[2:4]
            lines_3 = self.headers[4:5] + self.headers[-1:] + self.records[4:]
            file_name_1 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_1)
            file_name_2 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_2)
            file_name_3 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=lines_3)

            actual = source_test_utils.read_from_source(
                VcfEstimateSource(os.path.join(tempdir.get_path(), '*.vcf')))

            expected = [
                _get_estimate_from_lines(lines, file_name=file_name)
                for lines, file_name in [(
                    lines_1, file_name_1), (lines_2,
                                            file_name_2), (lines_3,
                                                           file_name_3)]
            ]

            asserts.header_vars_equal(expected)(actual)