def test_pipeline_read_all_file_pattern(self): with temp_dir.TempDir() as tempdir: headers_1 = [self.lines[1], self.lines[-1]] headers_2 = [self.lines[2], self.lines[3], self.lines[-1]] headers_3 = [self.lines[4], self.lines[-1]] file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=headers_1) file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=headers_2) file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=headers_3) pipeline = TestPipeline() pcoll = (pipeline | 'Create' >> beam.Create( [os.path.join(tempdir.get_path(), '*.vcf')]) | 'ReadHeaders' >> ReadAllVcfHeaders()) expected = [ _get_vcf_header_from_lines(h, file_name=file_name) for h, file_name in [( headers_1, file_name_1), (headers_2, file_name_2), (headers_3, file_name_3)] ] assert_that(pcoll, asserts.header_vars_equal(expected)) pipeline.run()
def test_pipeline_read_all_file_pattern(self): with temp_dir.TempDir() as tempdir: lines_1 = self.headers[1:2] + self.headers[-1:] + self.records[:2] lines_2 = self.headers[2:4] + self.headers[-1:] + self.records[2:4] lines_3 = self.headers[4:5] + self.headers[-1:] + self.records[4:] file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=lines_1) file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=lines_2) file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=lines_3) pipeline = TestPipeline() pcoll = pipeline | 'ReadHeaders' >> GetEstimates( os.path.join(tempdir.get_path(), '*.vcf')) pcoll = (pipeline | 'Create' >> beam.Create( [os.path.join(tempdir.get_path(), '*.vcf')]) | 'GetAllEstimates' >> GetAllEstimates()) expected = [ _get_estimate_from_lines(lines, file_name=file_name) for lines, file_name in [( lines_1, file_name_1), (lines_2, file_name_2), (lines_3, file_name_3)] ] assert_that(pcoll, asserts.header_vars_equal(expected)) pipeline.run()
def test_read_file_pattern(self): with temp_dir.TempDir() as tempdir: headers_1 = [self.lines[1], self.lines[-1]] headers_2 = [self.lines[2], self.lines[3], self.lines[-1]] headers_3 = [self.lines[4], self.lines[-1]] file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=headers_1) file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=headers_2) file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=headers_3) actual = source_test_utils.read_from_source(VcfHeaderSource( os.path.join(tempdir.get_path(), '*.vcf'))) expected = [_get_vcf_header_from_lines(h, file_name=file_name) for h, file_name in [(headers_1, file_name_1), (headers_2, file_name_2), (headers_3, file_name_3)]] asserts.header_vars_equal(expected)(actual)
def test_read_file_pattern(self): with temp_dir.TempDir() as tempdir: lines_1 = self.headers[1:2] + self.headers[-1:] + self.records[:2] lines_2 = self.headers[2:4] + self.headers[-1:] + self.records[2:4] lines_3 = self.headers[4:5] + self.headers[-1:] + self.records[4:] file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=lines_1) file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=lines_2) file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=lines_3) actual = source_test_utils.read_from_source( VcfEstimateSource(os.path.join(tempdir.get_path(), '*.vcf'))) expected = [ _get_estimate_from_lines(lines, file_name=file_name) for lines, file_name in [( lines_1, file_name_1), (lines_2, file_name_2), (lines_3, file_name_3)] ] asserts.header_vars_equal(expected)(actual)