예제 #1
0
 def test_basics(self):
     v = ('a' * 10, 'b' * 90)
     pickler = coders.PickleCoder()
     self.assertEquals(v, pickler.decode(pickler.encode(v)))
     pickler = coders.Base64PickleCoder()
     self.assertEquals(v, pickler.decode(pickler.encode(v)))
     self.assertEquals(coders.Base64PickleCoder().encode(v),
                       base64.b64encode(coders.PickleCoder().encode(v)))
예제 #2
0
    def test_create_do_avro_write(self):
        output_path = self.create_temp_file('n/a')
        elements = ['abc', 'def', 'ghi']
        work_item = workitem.BatchWorkItem(None)

        work_item.map_task = make_map_task([
            maptask.WorkerRead(
                inmemory.InMemorySource(
                    elements=[pickler.dumps(e) for e in elements],
                    start_index=2,  # Start at the last element.
                    end_index=3),
                output_coders=[self.OUTPUT_CODER]),
            maptask.WorkerDoFn(serialized_fn=pickle_with_side_inputs(
                ptransform.CallableWrapperDoFn(lambda x: ['XYZ: %s' % x])),
                               output_tags=['out'],
                               input=(0, 0),
                               side_inputs=None,
                               output_coders=[self.OUTPUT_CODER]),
            make_text_sink(output_path,
                           input=(1, 0),
                           coder=coders.Base64PickleCoder())
        ])

        executor.MapTaskExecutor(work_item.map_task).execute()
        with open(output_path) as f:
            self.assertEqual('XYZ: ghi', pickler.loads(f.read().strip()))
예제 #3
0
 def _parse_avro_sink(specs, unused_codec_specs, unused_context):
     # Note that the worker does not really implement AVRO yet.It takes
     # advantage that both reading and writing is done through the worker to
     # choose a supported format (text files with one pickled object per line).
     if specs['@type'] == 'AvroSink':
         return io.TextFileSink(specs['filename']['value'],
                                append_trailing_newlines=True,
                                coder=coders.Base64PickleCoder())
예제 #4
0
 def test_create_do_with_side_avro_file_write(self):
     input_path1 = self.create_temp_file('%s\n' % pickler.dumps('x'))
     input_path2 = self.create_temp_file('%s\n' % pickler.dumps('y'))
     elements = ['aa', 'bb']
     output_buffer = []
     executor.MapTaskExecutor().execute(
         make_map_task([
             maptask.WorkerRead(inmemory.InMemorySource(
                 elements=[pickler.dumps(e) for e in elements],
                 start_index=0,
                 end_index=2),
                                output_coders=[self.OUTPUT_CODER]),
             maptask.WorkerDoFn(
                 serialized_fn=pickle_with_side_inputs(
                     ptransform.CallableWrapperDoFn(
                         lambda x, side: ['%s:%s' % (x, s) for s in side]),
                     tag_and_type=('sometag',
                                   pvalue.IterablePCollectionView, ())),
                 output_tags=['out'],
                 input=(0, 0),
                 # Note that the two side inputs have the same tag. This is quite
                 # common for intermediary PCollections used as side inputs that
                 # are saved as AVRO files. The files will contain the sharded
                 # PCollection.
                 side_inputs=[
                     maptask.WorkerSideInputSource(fileio.TextFileSource(
                         file_path=input_path1,
                         coder=coders.Base64PickleCoder()),
                                                   tag='sometag'),
                     maptask.WorkerSideInputSource(fileio.TextFileSource(
                         file_path=input_path2,
                         coder=coders.Base64PickleCoder()),
                                                   tag='sometag')
                 ],
                 output_coders=[self.OUTPUT_CODER]),
             maptask.WorkerInMemoryWrite(
                 output_buffer=output_buffer,
                 input=(1, 0),
                 output_coders=(self.OUTPUT_CODER, ))
         ]))
     # The side source was specified as collection therefore we should see
     # all three elements of the side source.
     self.assertEqual([u'aa:x', u'aa:y', u'bb:x', u'bb:y'],
                      sorted(output_buffer))
예제 #5
0
 def _parse_avro_source(specs, unused_codec_specs, unused_context):
     if specs['@type'] == 'AvroSource':
         # Note that the worker does not really implement AVRO yet.It takes
         # advantage that both reading and writing is done through the worker to
         # choose a supported format (text files with one pickled object per line).
         start_offset = None
         if 'start_offset' in specs:
             start_offset = int(specs['start_offset']['value'])
         end_offset = None
         if 'end_offset' in specs:
             end_offset = int(specs['end_offset']['value'])
         return io.TextFileSource(file_path=specs['filename']['value'],
                                  start_offset=start_offset,
                                  end_offset=end_offset,
                                  strip_trailing_newlines=True,
                                  coder=coders.Base64PickleCoder())
예제 #6
0
    def __init__(self,
                 elements,
                 coder=coders.Base64PickleCoder(),
                 start_index=None,
                 end_index=None):
        self.elements = elements
        self.coder = coder

        if start_index is None:
            self.start_index = 0
        else:
            self.start_index = start_index

        if end_index is None:
            self.end_index = len(elements)
        else:
            self.end_index = end_index
예제 #7
0
 def test_equality(self):
     self.assertEquals(coders.PickleCoder(), coders.PickleCoder())
     self.assertEquals(coders.Base64PickleCoder(),
                       coders.Base64PickleCoder())
     self.assertNotEquals(coders.Base64PickleCoder(), coders.PickleCoder())
     self.assertNotEquals(coders.Base64PickleCoder(), object())