def reduce_input(self, task, params): # master should feed only the partitioned inputs to reduce (and shuffle them?) from disco.worker import SerialInput from disco.util import inputlist, ispartitioned, shuffled inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()] label = None if ispartitioned(inputs) and not self['merge_partitions']: label = task.group_label return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)), task=task, open=self.opener('reduce', 'in', params)), task)
def open(url, task=None): label = task.group_label if task else None return SerialInput(shuffled(inputlist([url], label=label)), open=lambda url: schemes.open_chain(url, task=task))
def open(url, task=None): partition = str(task.taskid) if task else None return SerialInput(shuffled(inputlist([url], partition=partition)), open=lambda url: schemes.open_chain(url, task=task))