Beispiel #1
0
 def cat(self, path, opts):
     addedopts = getopts(opts, ['libjar'], delete=False)
     streamingjar = findjar(self.hadoop, 'streaming')
     if not streamingjar:
         print >> sys.stderr, 'ERROR: Streaming jar not found'
         return 1
     hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'],
                     shortcuts=dict(configopts('jars')))
     try:
         import typedbytes
         ls = os.popen('%s %s/bin/hadoop dfs -ls %s' % (hadenv, self.hadoop, path))
         if sum(c in path for c in ("*", "?", "{")) > 0:
             # cat each file separately when the path contains special chars
             lineparts = (line.split()[-1] for line in ls)
             subpaths = [part for part in lineparts if part.startswith("/")]
         else:
             # we still do the ls even in this case to make sure we print errors 
             subpaths = [path]
         ls.close()
         for subpath in subpaths:
             dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null'
                               % (hadenv, self.hadoop, streamingjar, subpath))
             ascodeopt = getopt(opts, 'ascode')
             if ascodeopt and ascodeopt[0] == 'yes':
                 outputs = dumpcode(typedbytes.PairedInput(dumptb))
             else:
                 outputs = dumptext(typedbytes.PairedInput(dumptb))
             for output in outputs:
                 print '\t'.join(output)
             dumptb.close()
     except IOError:
         pass  # ignore
     return 0
Beispiel #2
0
 def cat(self, path, opts):
     addedopts = getopts(opts, ['libjar'], delete=False)
     streamingjar = findjar(self.hadoop, 'streaming')
     if not streamingjar:
         print >> sys.stderr, 'ERROR: Streaming jar not found'
         return 1
     hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'],
                     shortcuts=dict(configopts('jars')))
     try:
         import typedbytes
         ls = os.popen('%s %s dfs -ls %s' % (hadenv, self.hdfs, path))
         if sum(c in path for c in ("*", "?", "{")) > 0:
             # cat each file separately when the path contains special chars
             lineparts = (line.split()[-1] for line in ls)
             subpaths = [part for part in lineparts if part.startswith("/")]
         else:
             # we still do the ls even in this case to make sure we print errors 
             subpaths = [path]
         ls.close()
         for subpath in subpaths:
             if subpath.endswith("/_logs"):
                 continue
             dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null'
                               % (hadenv, self.hadoop, streamingjar, subpath))
             ascodeopt = getopt(opts, 'ascode')
             if ascodeopt and ascodeopt[0] == 'yes':
                 outputs = dumpcode(typedbytes.PairedInput(dumptb))
             else:
                 outputs = dumptext(typedbytes.PairedInput(dumptb))
             for output in outputs:
                 print '\t'.join(output)
             dumptb.close()
     except IOError:
         pass  # ignore
     return 0
Beispiel #3
0
def decodepipe(opts=None):
    opts = opts or Options()
    ofiles = opts.pop('file')
    files = map(open, ofiles) if ofiles else [sys.stdin]

    for _file in files:
        outputs = loadcode(line[:-1] for line in _file)
        for output in dumptext(outputs):
            print '\t'.join(output)
        _file.close()
        return 0
Beispiel #4
0
Datei: cmd.py Projekt: jso/dumbo
def decodepipe(opts=None):
    opts = opts or Options()
    ofiles = opts.pop('file')
    files = map(openFile, ofiles) if ofiles else [sys.stdin]

    for _file in files:
        outputs = loadcode(line[:-1] for line in _file)
        for output in dumptext(outputs):
            print '\t'.join(output)
        _file.close()
        return 0