def _execute_to_output(self): # Skip task if output already exists if hdfs.path_exists(self.output): self.log('Output path already exists %s', self.output) return # Define an intermediate output dir wip = '%s_wip' % self.output.rstrip('/') if hdfs.dus(wip + '*'): self.log('Removing intermediate outputs found under %s*', wip) hdfs.rmr(wip + '*') sleep(3) # give hdfs a chance to remove dir before job recreate it # Compute dumbo args and execute dumbo program self.execargs = self._execargs(output=wip) PythonTask.execute(self) # Check dumbo program output and move output to final path assert job_succeeded(wip), 'Intermediate output is invalid, check %s' % wip self.log('wip dir output is valid, moving to %s', self.output) hdfs.mv(self.output, wip)