コード例 #1
0
ファイル: streaming.py プロジェクト: adamhadani/dumbo
 def cat(self, path, opts):
     addedopts = getopts(opts, ['libjar'], delete=False)
     streamingjar = findjar(self.hadoop, 'streaming')
     if not streamingjar:
         print >> sys.stderr, 'ERROR: Streaming jar not found'
         return 1
     hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'],
                     shortcuts=dict(configopts('jars')))
     try:
         import typedbytes
         ls = os.popen('%s %s/bin/hadoop dfs -ls %s' % (hadenv, self.hadoop, path))
         if sum(c in path for c in ("*", "?", "{")) > 0:
             # cat each file separately when the path contains special chars
             lineparts = (line.split()[-1] for line in ls)
             subpaths = [part for part in lineparts if part.startswith("/")]
         else:
             # we still do the ls even in this case to make sure we print errors 
             subpaths = [path]
         ls.close()
         for subpath in subpaths:
             dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null'
                               % (hadenv, self.hadoop, streamingjar, subpath))
             ascodeopt = getopt(opts, 'ascode')
             if ascodeopt and ascodeopt[0] == 'yes':
                 outputs = dumpcode(typedbytes.PairedInput(dumptb))
             else:
                 outputs = dumptext(typedbytes.PairedInput(dumptb))
             for output in outputs:
                 print '\t'.join(output)
             dumptb.close()
     except IOError:
         pass  # ignore
     return 0
コード例 #2
0
 def cat(self, path, opts):
     addedopts = getopts(opts, ['libjar'], delete=False)
     streamingjar = findjar(self.hadoop, 'streaming')
     if not streamingjar:
         print >> sys.stderr, 'ERROR: Streaming jar not found'
         return 1
     hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'],
                     shortcuts=dict(configopts('jars')))
     try:
         import typedbytes
         ls = os.popen('%s %s dfs -ls %s' % (hadenv, self.hdfs, path))
         if sum(c in path for c in ("*", "?", "{")) > 0:
             # cat each file separately when the path contains special chars
             lineparts = (line.split()[-1] for line in ls)
             subpaths = [part for part in lineparts if part.startswith("/")]
         else:
             # we still do the ls even in this case to make sure we print errors 
             subpaths = [path]
         ls.close()
         for subpath in subpaths:
             if subpath.endswith("/_logs"):
                 continue
             dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null'
                               % (hadenv, self.hadoop, streamingjar, subpath))
             ascodeopt = getopt(opts, 'ascode')
             if ascodeopt and ascodeopt[0] == 'yes':
                 outputs = dumpcode(typedbytes.PairedInput(dumptb))
             else:
                 outputs = dumptext(typedbytes.PairedInput(dumptb))
             for output in outputs:
                 print '\t'.join(output)
             dumptb.close()
     except IOError:
         pass  # ignore
     return 0
コード例 #3
0
ファイル: cmd.py プロジェクト: andrix/dumbo
def encodepipe(opts=None):
    opts = opts or Options()
    keys = ['addpath', 'file', 'alreadycoded']
    addedopts = opts.filter(keys)
    opts.remove(*keys)

    ofiles = addedopts['file']
    files = map(open, ofiles) if ofiles else [sys.stdin]

    loadfun = loadcode if addedopts['alreadycoded'] else loadtext
    addpath = addedopts['addpath']

    for _file in files:
        outputs = loadfun(line[:-1] for line in _file)
        if addpath:
            outputs = (((_file.name, key), value) for (key, value) in outputs)
        for output in dumpcode(outputs):
            print '\t'.join(output)
        _file.close()
    return 0
コード例 #4
0
ファイル: cmd.py プロジェクト: jso/dumbo
def encodepipe(opts=None):
    opts = opts or Options()
    keys = ['addpath', 'file', 'alreadycoded']
    addedopts = opts.filter(keys)
    opts.remove(*keys)

    ofiles = addedopts['file']
    files = map(openFile, ofiles) if ofiles else [sys.stdin]

    loadfun = loadcode if addedopts['alreadycoded'] else loadtext
    addpath = addedopts['addpath']

    for _file in files:
        outputs = loadfun(line[:-1] for line in _file)
        if addpath:
            outputs = (((_file.name, key), value) for (key, value) in outputs)
        for output in dumpcode(outputs):
            print '\t'.join(output)
        _file.close()
    return 0
コード例 #5
0
ファイル: testcoding.py プロジェクト: CyaLiven/dumbo
 def dotest(self,data):
     dumped = "\t".join(util.dumpcode([("dummy",data)]).next())
     self.assertEqual(util.loadcode([dumped]).next()[1],data)
コード例 #6
0
 def dotest(self, data):
     dumped = "\t".join(util.dumpcode([("dummy", data)]).next())
     self.assertEqual(util.loadcode([dumped]).next()[1], data)