コード例 #1
0
    def test_create_from_null(self):
        all_data = []
        with concat_reader.ConcatSource(None).reader() as reader:
            for data in reader:
                all_data.append(data)

        self.assertEqual(0, len(all_data))
コード例 #2
0
    def test_read_empty_list(self):
        all_data = []
        with concat_reader.ConcatSource([]).reader() as reader:
            for data in reader:
                all_data.append(data)

        self.assertEqual(0, len(all_data))
コード例 #3
0
    def test_concat_source_to_shuffle_sink(self):
        work = workitem.get_work_items(
            get_concat_source_to_shuffle_sink_message())
        self.assertIsNotNone(work)
        expected_sub_sources = []
        expected_sub_sources.append(
            io.TextFileSource(file_path='gs://sort_g/input_small_files/'
                              'ascii_sort_1MB_input.0000006',
                              start_offset=0,
                              end_offset=1000000,
                              strip_trailing_newlines=True,
                              coder=CODER))
        expected_sub_sources.append(
            io.TextFileSource(file_path='gs://sort_g/input_small_files/'
                              'ascii_sort_1MB_input.0000007',
                              start_offset=0,
                              end_offset=1000000,
                              strip_trailing_newlines=True,
                              coder=CODER))

        expected_concat_source = concat_reader.ConcatSource(
            expected_sub_sources)

        self.assertEqual((work.proto.id, work.map_task.operations), (1234, [
            maptask.WorkerRead(expected_concat_source, output_coders=[CODER]),
            maptask.WorkerDoFn(serialized_fn='code',
                               output_tags=['out'],
                               input=(1, 0),
                               side_inputs=[],
                               output_coders=[CODER]),
            maptask.WorkerShuffleWrite(shuffle_kind='group_keys',
                                       shuffle_writer_config='opaque',
                                       input=(1, 0),
                                       output_coders=(CODER, ))
        ]))
コード例 #4
0
 def _parse_concat_source(specs, _, unused_context):
   if specs['@type'] == 'ConcatSource':
     assert unused_context.worker_environment is not None
     sub_sources = []
     for sub_source_dict in specs['sources']:
       sub_source_specs = sub_source_dict['spec']
       sub_source_codec_specs = None
       if 'encoding' in sub_source_dict:
         sub_source_codec_specs = sub_source_dict['encoding']
       sub_sources.append(unused_context.worker_environment.parse_source(
           sub_source_specs, sub_source_codec_specs, unused_context))
     return concat_reader.ConcatSource(sub_sources)
コード例 #5
0
    def _create_concat_source(self,
                              sub_source_sizes,
                              output_record,
                              index_of_source_to_fail=-1,
                              index_to_fail_reading=-1,
                              fail_reader_at_close=False):
        sub_sources = []
        all_data = self.create_data(sub_source_sizes)
        for data in all_data:
            output_record.extend(data)

        for index, data in enumerate(all_data):
            if index == index_of_source_to_fail:
                sub_sources.append(
                    TestSource(data, index_to_fail_reading,
                               fail_reader_at_close))
            else:
                sub_sources.append(TestSource(data, -1, False))
        return concat_reader.ConcatSource(sub_sources)