def run(self,hadoop): bname = [] output = self.output if len(output) > 0: bname = [os.path.basename(o) for o in output] # bname and output frequently used together opbn = zip(output, bname) if len(output) > 0 and all([os.path.exists(o) for o in output]): for op, bn in opbn: hadoop.put(op, bn) return else: hadoop.syscall(self.execute) if len(output) > 0: for op, bn in opbn: if self.stage == 'training': hadoop.getmerge(bn, op) else: hadoop.get(bn, op)
def run(self,hadoop,outdir,tmpdir): bname = [] output_suffix = [o + self.suffix for o in self.output] if len(self.output) > 0: bname = [os.path.basename(o) for o in self.output] # bname+suffix and output_suffix frequently used together osbn = zip(output_suffix, [b + self.suffix for b in bname]) if len(self.output) > 0 and all([os.path.exists(o) for o in output_suffix]): for os, bn in osbn: hadoop.put(os, bn) return else: file = self.realfile_(hadoop,outdir,tmpdir) execute = PTemplate(self.execute).safe_substitute({'file':file,'suffix':self.suffix}) hadoop.syscall(execute) if len(self.output) > 0: for os, bn in osbn: if self.stage == 'training': hadoop.getmerge(bn, os) else: hadoop.get(bn, os) self.cleanfile_(file,hadoop,outdir,tmpdir)