def test_basics(self): v = ('a' * 10, 'b' * 90) pickler = coders.PickleCoder() self.assertEquals(v, pickler.decode(pickler.encode(v))) pickler = coders.Base64PickleCoder() self.assertEquals(v, pickler.decode(pickler.encode(v))) self.assertEquals(coders.Base64PickleCoder().encode(v), base64.b64encode(coders.PickleCoder().encode(v)))
def test_create_do_avro_write(self): output_path = self.create_temp_file('n/a') elements = ['abc', 'def', 'ghi'] work_item = workitem.BatchWorkItem(None) work_item.map_task = make_map_task([ maptask.WorkerRead( inmemory.InMemorySource( elements=[pickler.dumps(e) for e in elements], start_index=2, # Start at the last element. end_index=3), output_coders=[self.OUTPUT_CODER]), maptask.WorkerDoFn(serialized_fn=pickle_with_side_inputs( ptransform.CallableWrapperDoFn(lambda x: ['XYZ: %s' % x])), output_tags=['out'], input=(0, 0), side_inputs=None, output_coders=[self.OUTPUT_CODER]), make_text_sink(output_path, input=(1, 0), coder=coders.Base64PickleCoder()) ]) executor.MapTaskExecutor(work_item.map_task).execute() with open(output_path) as f: self.assertEqual('XYZ: ghi', pickler.loads(f.read().strip()))
def _parse_avro_sink(specs, unused_codec_specs, unused_context): # Note that the worker does not really implement AVRO yet.It takes # advantage that both reading and writing is done through the worker to # choose a supported format (text files with one pickled object per line). if specs['@type'] == 'AvroSink': return io.TextFileSink(specs['filename']['value'], append_trailing_newlines=True, coder=coders.Base64PickleCoder())
def test_create_do_with_side_avro_file_write(self): input_path1 = self.create_temp_file('%s\n' % pickler.dumps('x')) input_path2 = self.create_temp_file('%s\n' % pickler.dumps('y')) elements = ['aa', 'bb'] output_buffer = [] executor.MapTaskExecutor().execute( make_map_task([ maptask.WorkerRead(inmemory.InMemorySource( elements=[pickler.dumps(e) for e in elements], start_index=0, end_index=2), output_coders=[self.OUTPUT_CODER]), maptask.WorkerDoFn( serialized_fn=pickle_with_side_inputs( ptransform.CallableWrapperDoFn( lambda x, side: ['%s:%s' % (x, s) for s in side]), tag_and_type=('sometag', pvalue.IterablePCollectionView, ())), output_tags=['out'], input=(0, 0), # Note that the two side inputs have the same tag. This is quite # common for intermediary PCollections used as side inputs that # are saved as AVRO files. The files will contain the sharded # PCollection. side_inputs=[ maptask.WorkerSideInputSource(fileio.TextFileSource( file_path=input_path1, coder=coders.Base64PickleCoder()), tag='sometag'), maptask.WorkerSideInputSource(fileio.TextFileSource( file_path=input_path2, coder=coders.Base64PickleCoder()), tag='sometag') ], output_coders=[self.OUTPUT_CODER]), maptask.WorkerInMemoryWrite( output_buffer=output_buffer, input=(1, 0), output_coders=(self.OUTPUT_CODER, )) ])) # The side source was specified as collection therefore we should see # all three elements of the side source. self.assertEqual([u'aa:x', u'aa:y', u'bb:x', u'bb:y'], sorted(output_buffer))
def _parse_avro_source(specs, unused_codec_specs, unused_context): if specs['@type'] == 'AvroSource': # Note that the worker does not really implement AVRO yet.It takes # advantage that both reading and writing is done through the worker to # choose a supported format (text files with one pickled object per line). start_offset = None if 'start_offset' in specs: start_offset = int(specs['start_offset']['value']) end_offset = None if 'end_offset' in specs: end_offset = int(specs['end_offset']['value']) return io.TextFileSource(file_path=specs['filename']['value'], start_offset=start_offset, end_offset=end_offset, strip_trailing_newlines=True, coder=coders.Base64PickleCoder())
def __init__(self, elements, coder=coders.Base64PickleCoder(), start_index=None, end_index=None): self.elements = elements self.coder = coder if start_index is None: self.start_index = 0 else: self.start_index = start_index if end_index is None: self.end_index = len(elements) else: self.end_index = end_index
def test_equality(self): self.assertEquals(coders.PickleCoder(), coders.PickleCoder()) self.assertEquals(coders.Base64PickleCoder(), coders.Base64PickleCoder()) self.assertNotEquals(coders.Base64PickleCoder(), coders.PickleCoder()) self.assertNotEquals(coders.Base64PickleCoder(), object())