def cat(self, path, opts): addedopts = getopts(opts, ['libjar'], delete=False) streamingjar = findjar(self.hadoop, 'streaming') if not streamingjar: print >> sys.stderr, 'ERROR: Streaming jar not found' return 1 hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'], shortcuts=dict(configopts('jars'))) try: import typedbytes ls = os.popen('%s %s/bin/hadoop dfs -ls %s' % (hadenv, self.hadoop, path)) if sum(c in path for c in ("*", "?", "{")) > 0: # cat each file separately when the path contains special chars lineparts = (line.split()[-1] for line in ls) subpaths = [part for part in lineparts if part.startswith("/")] else: # we still do the ls even in this case to make sure we print errors subpaths = [path] ls.close() for subpath in subpaths: dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null' % (hadenv, self.hadoop, streamingjar, subpath)) ascodeopt = getopt(opts, 'ascode') if ascodeopt and ascodeopt[0] == 'yes': outputs = dumpcode(typedbytes.PairedInput(dumptb)) else: outputs = dumptext(typedbytes.PairedInput(dumptb)) for output in outputs: print '\t'.join(output) dumptb.close() except IOError: pass # ignore return 0
def cat(self, path, opts): addedopts = getopts(opts, ['libjar'], delete=False) streamingjar = findjar(self.hadoop, 'streaming') if not streamingjar: print >> sys.stderr, 'ERROR: Streaming jar not found' return 1 hadenv = envdef('HADOOP_CLASSPATH', addedopts['libjar'], shortcuts=dict(configopts('jars'))) try: import typedbytes ls = os.popen('%s %s dfs -ls %s' % (hadenv, self.hdfs, path)) if sum(c in path for c in ("*", "?", "{")) > 0: # cat each file separately when the path contains special chars lineparts = (line.split()[-1] for line in ls) subpaths = [part for part in lineparts if part.startswith("/")] else: # we still do the ls even in this case to make sure we print errors subpaths = [path] ls.close() for subpath in subpaths: if subpath.endswith("/_logs"): continue dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null' % (hadenv, self.hadoop, streamingjar, subpath)) ascodeopt = getopt(opts, 'ascode') if ascodeopt and ascodeopt[0] == 'yes': outputs = dumpcode(typedbytes.PairedInput(dumptb)) else: outputs = dumptext(typedbytes.PairedInput(dumptb)) for output in outputs: print '\t'.join(output) dumptb.close() except IOError: pass # ignore return 0
def encodepipe(opts=None): opts = opts or Options() keys = ['addpath', 'file', 'alreadycoded'] addedopts = opts.filter(keys) opts.remove(*keys) ofiles = addedopts['file'] files = map(open, ofiles) if ofiles else [sys.stdin] loadfun = loadcode if addedopts['alreadycoded'] else loadtext addpath = addedopts['addpath'] for _file in files: outputs = loadfun(line[:-1] for line in _file) if addpath: outputs = (((_file.name, key), value) for (key, value) in outputs) for output in dumpcode(outputs): print '\t'.join(output) _file.close() return 0
def encodepipe(opts=None): opts = opts or Options() keys = ['addpath', 'file', 'alreadycoded'] addedopts = opts.filter(keys) opts.remove(*keys) ofiles = addedopts['file'] files = map(openFile, ofiles) if ofiles else [sys.stdin] loadfun = loadcode if addedopts['alreadycoded'] else loadtext addpath = addedopts['addpath'] for _file in files: outputs = loadfun(line[:-1] for line in _file) if addpath: outputs = (((_file.name, key), value) for (key, value) in outputs) for output in dumpcode(outputs): print '\t'.join(output) _file.close() return 0
def dotest(self,data): dumped = "\t".join(util.dumpcode([("dummy",data)]).next()) self.assertEqual(util.loadcode([dumped]).next()[1],data)
def dotest(self, data): dumped = "\t".join(util.dumpcode([("dummy", data)]).next()) self.assertEqual(util.loadcode([dumped]).next()[1], data)