def prepare_input_map(self, task, stage, params): # The input map maps a label to a sequence of inputs with that # label. map = defaultdict(list) for l, i in util.chainify(self.labelexpand(task, stage, i, params) for i in self.get_inputs()): if stage.combine_labels: map[0].append(i) else: map[l].append(i) if stage.sort: newmap = {} if stage.input_sorted: for label, inputs in map.iteritems(): input = merge(*(merge_wrapper(inp, sort_range=stage.sort, desc=stage.desc) for inp in inputs)) newmap[label] = [input] else: for label, inputs in map.iteritems(): input = chainify(shuffled(inputs)) newmap[label] = [disk_sort(input, task.path('sort.dl'), sort_keys=stage.sort, sort_buffer_size='15%', binaries=stage.binaries, desc=stage.desc)] map = newmap #print "OUTSIE: %s" % str(map) return map
def reduce_input(self, task, params): # master should feed only the partitioned inputs to reduce (and shuffle them?) from disco.worker import SerialInput from disco.util import inputlist, ispartitioned, shuffled inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()] partition = None if ispartitioned(inputs) and not self['merge_partitions']: partition = str(task.taskid) return self.sort(SerialInput(shuffled(inputlist(inputs, partition=partition)), task=task, open=self.opener('reduce', 'in', params)), task)
def reduce_input(self, task, params): # master should feed only the partitioned inputs to reduce (and shuffle them?) from disco.worker import SerialInput from disco.util import inputlist, ispartitioned, shuffled inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()] label = None if ispartitioned(inputs) and not self['merge_partitions']: label = task.group_label return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)), task=task, open=self.opener('reduce', 'in', params)), task)
def reduce_input(self, task, params): # master should feed only the partitioned inputs to reduce (and shuffle them?) from disco.worker import SerialInput from disco.util import inputlist, ispartitioned, shuffled inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()] label = None if ispartitioned(inputs): label = task.group_label return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)), task=task, open=self.opener('reduce', 'in', params)), task)
def test_changes(self): if len(self.nodes) < 2: self.skipTest("Cannot test node changes with < 2 nodes") else: local = ['url://%s' % node for node, max_workers in self.nodes.iteritems() for x in xrange(max_workers * 2)] input = shuffled(local + range(self.num_workers)) self.job = ConfigJob().run(input=self.test_server.urls(input)) time.sleep(5) self.disco.config = self.config[:2] time.sleep(5) self.disco.config = self.config[:1] time.sleep(5) self.disco.config = self.config self.checkAnswers(self.job, input)
def test_changes(self): if len(self.nodes) < 2: self.skipTest("Cannot test node changes with < 2 nodes") else: local = [ 'url://{0}'.format(node) for node, max_workers in self.nodes.items() for x in range(max_workers * 2) ] input = shuffled(local + list(range(self.num_workers))) self.job = ConfigJob().run(input=self.test_server.urls(input)) time.sleep(5) self.disco.config = self.config[:2] time.sleep(5) self.disco.config = self.config[:1] time.sleep(5) self.disco.config = self.config self.checkAnswers(self.job, input)
def map(string, params): return shuffled((base64.encodestring(c), '') for c in string * 10)
def open(url, task=None): label = task.group_label if task else None return SerialInput(shuffled(inputlist([url], label=label)), open=lambda url: schemes.open_chain(url, task=task))
def map(string, params): return shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(string * 10))
def Map(interface, state, label, inp): out = interface.output(0) for i in inp: for k, v in shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(str_to_bytes(i) * 10)): out.add(k, v)
def open(url, task=None): partition = str(task.taskid) if task else None return SerialInput(shuffled(inputlist([url], partition=partition)), open=lambda url: schemes.open_chain(url, task=task))