コード例 #1
0
ファイル: job.py プロジェクト: mtai/mrjob
 def _mapper_output_protocol(self, step_num, step_map):
     map_key = self._step_key(step_num, 'mapper')
     if map_key in step_map:
         if step_map[map_key] >= (len(step_map) - 1):
             return self.output_protocol()
         else:
             return self.internal_protocol()
     else:
         # mapper is not a script substep, so protocols don't apply at all
         return RawValueProtocol()
コード例 #2
0
    def _pick_protocol_instances(self, step_num, step_type):
        steps_desc = self._steps_desc()

        step_map = self._script_step_mapping(steps_desc)

        # pick input protocol

        if step_type == 'combiner':
            # Combiners read and write the mapper's output protocol because
            # they have to be able to run 0-inf times without changing the
            # format of the data.
            # Combiners for non-script substeps can't use protocols, so this
            # function will just give us RawValueProtocol() in that case.
            previous_mapper_output = self._mapper_output_protocol(
                step_num, step_map)
            return previous_mapper_output, previous_mapper_output
        else:
            step_key = self._step_key(step_num, step_type)

            if step_key not in step_map:
                # It's unlikely that we will encounter this logic in real life,
                # but if asked what the protocol of a non-script step is, we
                # should just say RawValueProtocol because we have no idea what
                # the jars or commands are doing with our precious data.
                # If --strict-protocols, though, we won't stand for these
                # shenanigans!
                if self.options.strict_protocols:
                    raise ValueError(
                        "Can't pick a protocol for a non-script step")
                else:
                    p = RawValueProtocol()
                    return p, p

            real_num = step_map[step_key]
            if real_num == (len(step_map) - 1):
                write = self.output_protocol()
            else:
                write = self.internal_protocol()

            if real_num == 0:
                read = self.input_protocol()
            else:
                read = self.internal_protocol()
            return read, write
コード例 #3
0
 def test_no_strip(self):
     self.assertEqual(RawValueProtocol.read('foo\t \n\n'),
                      (None, 'foo\t \n\n'))
コード例 #4
0
 def test_reads_raw_line(self):
     self.assertEqual(RawValueProtocol.read('foobar'), (None, 'foobar'))
コード例 #5
0
 def test_dumps_keys(self):
     self.assertEqual(RawValueProtocol.write('foo', 'bar'), 'bar')
コード例 #6
0
 def output_protocol(self):
     return RawValueProtocol()
コード例 #7
0
 def input_protocol(self):
     if self.options.job_to_run != 'stats':
         LOG.debug('Reading text input from cdx files')
         return RawValueProtocol()
     LOG.debug('Reading JSON input from count job')
     return JSONProtocol()
コード例 #8
0
 def test_no_strip(self):
     self.assertEqual(RawValueProtocol.read('foo\t \n\n'),
                      (None, 'foo\t \n\n'))
コード例 #9
0
 def test_reads_raw_line(self):
     self.assertEqual(RawValueProtocol.read('foobar'), (None, 'foobar'))
コード例 #10
0
 def test_dumps_keys(self):
     self.assertEqual(RawValueProtocol.write('foo', 'bar'), 'bar')
コード例 #11
0
ファイル: test_protocol.py プロジェクト: zwd199032/mrjob
 def test_bytestrings(self):
     self.assertRoundTripOK(RawValueProtocol(), None, '\xe90\c1a')
コード例 #12
0
 def test_dumps_keys(self):
     self.assertEqual(RawValueProtocol().write(b'foo', b'bar'), b'bar')