def result_iterator(results, notifier=None, reader=func.chain_reader, input_stream=(func.map_input_stream, ), params=None, ddfs=None, tempdir=None): """ Iterates the key-value pairs in job results. *results* is a list of results, as returned by :meth:`Disco.wait`. :param notifier: a function called when the iterator moves to the next result file:: def notifier(url): ... *url* may be a list if results are replicated. :param reader: a custom reader function. Specify this to match with a custom *map_writer* or *reduce_writer*. By default, *reader* is :func:`disco.func.netstr_reader`. :param tempdir: if results are replicated, *result_iterator* ensures that only valid replicas are used. By default, this is done by downloading and parsing results first to a temporary file. If the temporary file was created succesfully, the results are returned, otherwise an alternative replica is used. If *tempdir=None* (default), the system default temporary directory is used (typically ``/tmp``). An alternative path can be set with *tempdir="path"*. Temporary files can be disabled with *tempdir=False*, in which case results are read in memory. """ from disco.task import Task task = Task() task.params = params task.input_stream = list(input_stream) if reader: task.input_stream.append(func.reader_wrapper(reader)) task.insert_globals(task.input_stream) for result in results: for url in util.urllist(result, ddfs=ddfs): if notifier: notifier(url) if type(url) == list: iter = process_url_safe(url, tempdir, task) else: iter, sze, url = task.connect_input(url) for x in iter: yield x
def init(mode, host, master, job_name, id, inputs): global Task Task = TaskEnvironment(mode, host, master, job_name, id, inputs) ensure_path(os.path.dirname(Task.oob_file(''))) os.chdir(Task.path('CHDIR_PATH'))
def get_task(cls): from disco.task import Task return Task(**dict((str(k), v) for k, v in cls.send('TASK').items()))