def jobpack(self): jobpack = self.path('JOBPACK') ensure_path(os.path.dirname(jobpack)) def data(): return Disco(self.master).jobpack(self.jobname) ensure_file(jobpack, data=data, mode=444) return jobpack
def download_and_sort(self, params): dlname = Task.path("REDUCE_DL", Task.id) msg("Reduce will be downloaded to %s" % dlname) out_fd = AtomicFile(dlname, "w") for url in self.inputs: fd, sze, url = connect_input(url, params) for k, v in fun_reader(fd, sze, url): if " " in k: err("Spaces are not allowed in keys "\ "with external sort.") if "\0" in v: err("Zero bytes are not allowed in "\ "values with external sort. "\ "Consider using base64 encoding.") out_fd.write("%s %s\0" % (k, v)) out_fd.close() msg("Reduce input downloaded ok") msg("Starting external sort") sortname = Task.path("REDUCE_SORTED", Task.id) ensure_path(os.path.dirname(sortname)) cmd = ["sort", "-n", "-k", "1,1", "-z",\ "-t", " ", "-o", sortname, dlname] proc = subprocess.Popen(cmd) ret = proc.wait() if ret: err("Sorting %s to %s failed (%d)" %\ (dlname, sortname, ret)) msg("External sort done: %s" % sortname) return self.multi_file_iterator([sortname], params, reader =\ lambda fd, sze, url:\ re_reader("(?s)(.*?) (.*?)\000", fd, sze, url))
def jobpack(self): jobpack = os.path.join(self.jobroot, 'jobpack.dl') def data(): return Disco(self.master).jobpack(self.jobname) ensure_path(self.jobroot) ensure_file(jobpack, data=data, mode=444) return jobpack
def run(self): assert self.version == '%s.%s' % sys.version_info[:2], "Python version mismatch" ensure_path(os.path.dirname(self.path('OOB_FILE', ''))) os.chdir(self.path('CHDIR_PATH')) path = self.path('REQ_FILES') write_files(self.required_files, path) sys.path.insert(0, path) self.insert_globals(self.functions) if self.profile: self._run_profile() else: self._run()
def download_and_sort(self): dlname = self.task.path('REDUCE_DL', self.task.id) Message("Reduce will be downloaded to %s" % dlname) out_fd = AtomicFile(dlname, 'w') for url in self.inputs: reader, sze, url = self.task.connect_input(url) for k, v in reader: self.sort_writer(out_fd, k, v) out_fd.close() Message("Reduce input downloaded ok") Message("Starting external sort") sortname = self.task.path('REDUCE_SORTED', self.task.id) ensure_path(os.path.dirname(sortname)) cmd = ['sort', '-n', '-k', '1,1', '-T', '.', '-z', '-t', '\xff', '-o', sortname, dlname] proc = subprocess.Popen(cmd) ret = proc.wait() if ret: TaskFailed("Sorting %s to %s failed (%d)" % (dlname, sortname, ret)) Message("External sort done: %s" % sortname) return self.multi_file_iterator(self.sort_reader, inputs=[sortname])
def init(mode, host, master, job_name, id, inputs): global Task Task = TaskEnvironment(mode, host, master, job_name, id, inputs) ensure_path(os.path.dirname(Task.oob_file(''))) os.chdir(Task.path('CHDIR_PATH'))
def makedirs(self): from disco.fileutils import ensure_path ensure_path(self.taskpath)
def run(self): assert self.version == '%s.%s' % sys.version_info[:2], "Python version mismatch" ensure_path(self.taskroot) os.chdir(self.taskroot) os.symlink(self.lib, 'lib') self._run_profile() if self.profile else self._run()