예제 #1
0
 def testpairio(self):
     objects = TestIO.objects
     file = open("test.bin", "wb")
     output = typedbytes.PairedOutput(file)
     output.writes(enumerate(objects))
     file.close()
     file = open("test.bin", "rb")
     input = typedbytes.PairedInput(file)
     for index, record in input.reads():
         self.assertEqual(objects[index], record)
     file.close()
     os.remove("test.bin")
예제 #2
0
 def testwrongio(self):
     try:
         file = open("test.bin", "wb")
         output = typedbytes.Output(file)
         output.writes([1])
         file.close()
         file = open("test.bin", "rb")
         input = typedbytes.Input(file)
         input = typedbytes.PairedInput(file)
         self.assertRaises(StructError, lambda :list(input.reads()))
         file.close()
     finally:
         os.remove("test.bin")
예제 #3
0
def run(mapper,
        reducer=None,
        combiner=None,
        buffersize=None,
        mapconf=None,
        redconf=None,
        combconf=None,
        mapclose=None,
        redclose=None,
        combclose=None,
        opts=None,
        input=None,
        output=None,
        iter=0):
    if len(sys.argv) > 1 and not sys.argv[1][0] == '-':
        iterarg = 0  # default value
        if len(sys.argv) > 2:
            iterarg = int(sys.argv[2])
        memlim = None  # memory limit
        if len(sys.argv) > 3:
            memlim = int(sys.argv[3])
            resource.setrlimit(resource.RLIMIT_AS, (memlim, memlim))

        mrbase_class = loadclassname(os.environ['dumbo_mrbase_class'])
        jk_class = loadclassname(os.environ['dumbo_jk_class'])
        runinfo = loadclassname(os.environ['dumbo_runinfo_class'])()

        if iterarg == iter:
            if sys.argv[1].startswith('map'):
                if type(mapper) in (types.ClassType, type):
                    mappercls = type('DumboMapper', (mapper, mrbase_class), {})
                    mapper = mappercls()
                if hasattr(mapper, 'configure'):
                    mapconf = mapper.configure
                if hasattr(mapper, 'close'):
                    mapclose = mapper.close
                if hasattr(mapper, 'map'):
                    mapper = mapper.map
                if type(combiner) in (types.ClassType, type):
                    combinercls = type('DumboCombiner',
                                       (combiner, mrbase_class), {})
                    combiner = combinercls()
                if hasattr(combiner, 'configure'):
                    combconf = combiner.configure
                if hasattr(combiner, 'close'):
                    combclose = combiner.close
                if hasattr(combiner, 'reduce'):
                    combiner = combiner.reduce
                try:
                    print >> sys.stderr, "INFO: consuming %s" % \
                                         os.environ['map_input_file']
                except KeyError:
                    pass
                if os.environ.has_key('stream_map_input') and \
                os.environ['stream_map_input'].lower() == 'typedbytes':
                    print >> sys.stderr, "INFO: inputting typed bytes"
                    try:
                        import ctypedbytes as typedbytes
                    except ImportError:
                        import typedbytes
                    inputs = typedbytes.PairedInput(sys.stdin).reads()
                else:
                    inputs = loadcode(line[:-1] for line in sys.stdin)
                if mapconf:
                    mapconf()
                if combconf:
                    combconf()
                if os.environ.has_key('dumbo_addpath'):
                    path = runinfo.get_input_path()
                    inputs = (((path, k), v) for (k, v) in inputs)
                if os.environ.has_key('dumbo_joinkeys'):
                    inputs = ((jk_class(k), v) for (k, v) in inputs)
                if os.environ.has_key('dumbo_parser'):
                    parser = os.environ['dumbo_parser']
                    clsname = parser.split('.')[-1]
                    modname = '.'.join(parser.split('.')[:-1])
                    if not modname:
                        raise ImportError(parser)
                    module = __import__(modname, fromlist=[clsname])
                    parse = getattr(module, clsname)().parse
                    outputs = itermap(inputs, mapper, parse)
                elif os.environ.has_key('dumbo_record'):
                    record = os.environ['dumbo_record']
                    clsname = record.split('.')[-1]
                    modname = '.'.join(record.split('.')[:-1])
                    if not modname:
                        raise ImportError(parser)
                    module = __import__(modname, fromlist=[clsname])
                    set = getattr(module, clsname)().set
                    outputs = itermap(inputs, mapper, lambda v: set(*v))
                else:
                    outputs = itermap(inputs, mapper)
                if combiner and type(combiner) != str:
                    if (not buffersize) and memlim:
                        buffersize = int(memlim * 0.33) / 512  # educated guess
                        print >> sys.stderr, 'INFO: buffersize =', buffersize
                    inputs = sorted(outputs, buffersize)
                    if os.environ.has_key('dumbo_joinkeys'):
                        outputs = iterreduce(inputs,
                                             combiner,
                                             keyfunc=jk_class.fromjoinkey)
                    else:
                        outputs = iterreduce(inputs, combiner)
                if os.environ.has_key('dumbo_joinkeys'):
                    outputs = ((jk.dump(), v) for (jk, v) in outputs)
                if os.environ.has_key('stream_map_output') and \
                os.environ['stream_map_output'].lower() == 'typedbytes':
                    print >> sys.stderr, "INFO: outputting typed bytes"
                    try:
                        import ctypedbytes as typedbytes
                    except ImportError:
                        import typedbytes
                    typedbytes.PairedOutput(sys.stdout).writes(outputs)
                else:
                    for output in dumpcode(outputs):
                        print '\t'.join(output)
                if combclose:
                    combclose()
                if mapclose:
                    mapclose()
            elif reducer:
                if type(reducer) in (types.ClassType, type):
                    reducercls = type('DumboReducer', (reducer, mrbase_class),
                                      {})
                    reducer = reducercls()
                if hasattr(reducer, 'configure'):
                    redconf = reducer.configure
                if hasattr(reducer, 'close'):
                    redclose = reducer.close
                if hasattr(reducer, 'reduce'):
                    reducer = reducer.reduce
                if os.environ.has_key('stream_reduce_input') and \
                os.environ['stream_reduce_input'].lower() == 'typedbytes':
                    print >> sys.stderr, "INFO: inputting typed bytes"
                    try:
                        import ctypedbytes as typedbytes
                    except ImportError:
                        import typedbytes
                    inputs = typedbytes.PairedInput(sys.stdin).reads()
                else:
                    inputs = loadcode(line[:-1] for line in sys.stdin)
                if redconf:
                    redconf()
                if os.environ.has_key('dumbo_joinkeys'):
                    outputs = iterreduce(inputs,
                                         reducer,
                                         keyfunc=jk_class.fromdump)
                    outputs = ((jk.body, v) for (jk, v) in outputs)
                else:
                    outputs = iterreduce(inputs, reducer)
                if os.environ.has_key('stream_reduce_output') and \
                os.environ['stream_reduce_output'].lower() == 'typedbytes':
                    print >> sys.stderr, "INFO: outputting typed bytes"
                    try:
                        import ctypedbytes as typedbytes
                    except ImportError:
                        import typedbytes
                    typedbytes.PairedOutput(sys.stdout).writes(outputs)
                else:
                    for output in dumpcode(outputs):
                        print '\t'.join(output)
                if redclose:
                    redclose()
            else:
                for output in dumpcode(inputs):
                    print '\t'.join(output)
    else:
        opts = Options(opts)
        if type(mapper) == str:
            opts.add('mapper', mapper)
        elif hasattr(mapper, 'opts'):
            opts += mapper.opts
        if type(reducer) == str:
            opts.add('reducer', reducer)
        elif hasattr(reducer, 'opts'):
            opts += reducer.opts
        if type(combiner) == str:
            opts.add('combiner', combiner)
        opts += parseargs(sys.argv[1:])

        if input is not None:
            opts.remove('input')
            for infile in input:
                opts.add('input', infile)

        if output is None:
            outputopt = opts['output']
            if not outputopt:
                print >> sys.stderr, 'ERROR: No output path specified'
                sys.exit(1)
            output = outputopt[0]

        newopts = Options()
        newopts.add('output', output)
        if not reducer:
            newopts.add('numreducetasks', '0')

        keys = [k for k, _ in opts if k in newopts]
        opts.remove(*keys)
        opts += newopts

        backend = get_backend(opts)

        overwriteopt = opts.pop('overwrite')
        checkoutput = 'no' not in opts.pop('checkoutput')
        fs = backend.create_filesystem(opts)
        if 'yes' in overwriteopt:
            fs.rm(output, opts)
        elif checkoutput and fs.exists(output, opts) == 0:
            print >> sys.stderr, 'ERROR: Output path exists already: %s' % output
            sys.exit(1)

        opts.add('cmdenv', 'dumbo_mrbase_class=' + \
                     getclassname(backend.get_mapredbase_class(opts)))
        opts.add('cmdenv', 'dumbo_jk_class=' + \
                     getclassname(backend.get_joinkey_class(opts)))
        opts.add('cmdenv', 'dumbo_runinfo_class=' + \
                     getclassname(backend.get_runinfo_class(opts)))
        retval = backend.create_iteration(opts).run()
        if retval == 127:
            print >> sys.stderr, 'ERROR: Are you sure that "python" is on your path?'
        if retval != 0:
            sys.exit(retval)
예제 #4
0
import ctypedbytes as typedbytes
import sys

b = typedbytes.PairedInput(sys.stdin)
c = typedbytes.PairedOutput(sys.stdout)
c.writes(b)