Exemple #1
0
 def jobpack(self):
     jobpack = self.path('JOBPACK')
     ensure_path(os.path.dirname(jobpack))
     def data():
         return Disco(self.master).jobpack(self.jobname)
     ensure_file(jobpack, data=data, mode=444)
     return jobpack
Exemple #2
0
    def download_and_sort(self, params):
        dlname = Task.path("REDUCE_DL", Task.id)
        msg("Reduce will be downloaded to %s" % dlname)
        out_fd = AtomicFile(dlname, "w")
        for url in self.inputs:
            fd, sze, url = connect_input(url, params)
            for k, v in fun_reader(fd, sze, url):
                if " " in k:
                    err("Spaces are not allowed in keys "\
                        "with external sort.")
                if "\0" in v:
                    err("Zero bytes are not allowed in "\
                        "values with external sort. "\
                        "Consider using base64 encoding.")
                out_fd.write("%s %s\0" % (k, v))
        out_fd.close()
        msg("Reduce input downloaded ok")

        msg("Starting external sort")
        sortname = Task.path("REDUCE_SORTED", Task.id)
        ensure_path(os.path.dirname(sortname))
        cmd = ["sort", "-n", "-k", "1,1", "-z",\
            "-t", " ", "-o", sortname, dlname]

        proc = subprocess.Popen(cmd)
        ret = proc.wait()
        if ret:
            err("Sorting %s to %s failed (%d)" %\
                (dlname, sortname, ret))

        msg("External sort done: %s" % sortname)
        return self.multi_file_iterator([sortname], params, reader =\
            lambda fd, sze, url:\
                re_reader("(?s)(.*?) (.*?)\000", fd, sze, url))
Exemple #3
0
 def jobpack(self):
     jobpack = os.path.join(self.jobroot, 'jobpack.dl')
     def data():
         return Disco(self.master).jobpack(self.jobname)
     ensure_path(self.jobroot)
     ensure_file(jobpack, data=data, mode=444)
     return jobpack
Exemple #4
0
 def jobpack(self):
     jobpack = os.path.join(self.jobroot, 'jobpack.dl')
     def data():
         return Disco(self.master).jobpack(self.jobname)
     ensure_path(self.jobroot)
     ensure_file(jobpack, data=data, mode=444)
     return jobpack
Exemple #5
0
 def run(self):
     assert self.version == '%s.%s' % sys.version_info[:2], "Python version mismatch"
     ensure_path(os.path.dirname(self.path('OOB_FILE', '')))
     os.chdir(self.path('CHDIR_PATH'))
     path = self.path('REQ_FILES')
     write_files(self.required_files, path)
     sys.path.insert(0, path)
     self.insert_globals(self.functions)
     if self.profile:
         self._run_profile()
     else:
         self._run()
Exemple #6
0
    def download_and_sort(self):
        dlname = self.task.path('REDUCE_DL', self.task.id)
        Message("Reduce will be downloaded to %s" % dlname)
        out_fd = AtomicFile(dlname, 'w')
        for url in self.inputs:
            reader, sze, url = self.task.connect_input(url)
            for k, v in reader:
                self.sort_writer(out_fd, k, v)
        out_fd.close()
        Message("Reduce input downloaded ok")

        Message("Starting external sort")
        sortname = self.task.path('REDUCE_SORTED', self.task.id)
        ensure_path(os.path.dirname(sortname))
        cmd = ['sort', '-n', '-k', '1,1', '-T', '.',
                       '-z', '-t', '\xff', '-o', sortname, dlname]

        proc = subprocess.Popen(cmd)
        ret = proc.wait()
        if ret:
            TaskFailed("Sorting %s to %s failed (%d)" % (dlname, sortname, ret))

        Message("External sort done: %s" % sortname)
        return self.multi_file_iterator(self.sort_reader, inputs=[sortname])
Exemple #7
0
def init(mode, host, master, job_name, id, inputs):
    global Task
    Task = TaskEnvironment(mode, host, master, job_name, id, inputs)
    ensure_path(os.path.dirname(Task.oob_file('')))
    os.chdir(Task.path('CHDIR_PATH'))
Exemple #8
0
 def makedirs(self):
     from disco.fileutils import ensure_path
     ensure_path(self.taskpath)
Exemple #9
0
 def makedirs(self):
     from disco.fileutils import ensure_path
     ensure_path(self.taskpath)
Exemple #10
0
 def run(self):
     assert self.version == '%s.%s' % sys.version_info[:2], "Python version mismatch"
     ensure_path(self.taskroot)
     os.chdir(self.taskroot)
     os.symlink(self.lib, 'lib')
     self._run_profile() if self.profile else self._run()