Ejemplo n.º 1
0
    def prepare_input_map(self, task, stage, params):
        # The input map maps a label to a sequence of inputs with that
        # label.
        map = defaultdict(list)

        for l, i in util.chainify(self.labelexpand(task, stage, i, params)
                                  for i in self.get_inputs()):
            if stage.combine_labels:
                map[0].append(i)
            else:
                map[l].append(i)

        if stage.sort:
            newmap = {}
            if stage.input_sorted:
                for label, inputs in map.iteritems():
                    input = merge(*(merge_wrapper(inp,
                                                  sort_range=stage.sort,
                                                  desc=stage.desc)
                                    for inp in inputs))
                    newmap[label] = [input]
            else:
                for label, inputs in map.iteritems():

                    input = chainify(shuffled(inputs))
                    newmap[label] = [disk_sort(input,
                                               task.path('sort.dl'),
                                               sort_keys=stage.sort,
                                               sort_buffer_size='15%',
                                               binaries=stage.binaries,
                                               desc=stage.desc)]
            map = newmap
        #print "OUTSIE: %s" % str(map)
        return map
Ejemplo n.º 2
0
    def prepare_input_map(self, task, stage, params):
        # The input map maps a label to a sequence of inputs with that
        # label.
        map = defaultdict(list)

        for l, i in util.chainify(self.labelexpand(task, stage, i, params) for i in self.get_inputs()):
            if stage.combine_labels:
                map[0].append(i)
            else:
                map[l].append(i)

        if stage.sort:
            newmap = {}
            if stage.input_sorted:
                for label, inputs in map.iteritems():
                    input = merge(*(merge_wrapper(inp, sort_range=stage.sort, desc=stage.desc) for inp in inputs))
                    newmap[label] = [input]
            else:
                for label, inputs in map.iteritems():

                    input = chainify(shuffled(inputs))
                    newmap[label] = [disk_sort(input,
                                               task.path('sort.dl'),
                                               sort_keys=stage.sort,
                                               sort_buffer_size='15%',
                                               binaries=stage.binaries,
                                               desc=stage.desc)]
            map = newmap
        #print "OUTSIE: %s" % str(map)
        return map
Ejemplo n.º 3
0
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     partition = None
     if ispartitioned(inputs) and not self['merge_partitions']:
         partition = str(task.taskid)
     return self.sort(SerialInput(shuffled(inputlist(inputs, partition=partition)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
Ejemplo n.º 4
0
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     label = None
     if ispartitioned(inputs) and not self['merge_partitions']:
         label = task.group_label
     return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
Ejemplo n.º 5
0
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     label = None
     if ispartitioned(inputs):
         label = task.group_label
     return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
Ejemplo n.º 6
0
 def test_changes(self):
     if len(self.nodes) < 2:
         self.skipTest("Cannot test node changes with < 2 nodes")
     else:
         local = ['url://%s' % node
                  for node, max_workers in self.nodes.iteritems()
                  for x in xrange(max_workers * 2)]
         input = shuffled(local + range(self.num_workers))
         self.job = ConfigJob().run(input=self.test_server.urls(input))
         time.sleep(5)
         self.disco.config = self.config[:2]
         time.sleep(5)
         self.disco.config = self.config[:1]
         time.sleep(5)
         self.disco.config = self.config
         self.checkAnswers(self.job, input)
Ejemplo n.º 7
0
 def test_changes(self):
     if len(self.nodes) < 2:
         self.skipTest("Cannot test node changes with < 2 nodes")
     else:
         local = [
             'url://{0}'.format(node)
             for node, max_workers in self.nodes.items()
             for x in range(max_workers * 2)
         ]
         input = shuffled(local + list(range(self.num_workers)))
         self.job = ConfigJob().run(input=self.test_server.urls(input))
         time.sleep(5)
         self.disco.config = self.config[:2]
         time.sleep(5)
         self.disco.config = self.config[:1]
         time.sleep(5)
         self.disco.config = self.config
         self.checkAnswers(self.job, input)
Ejemplo n.º 8
0
 def map(string, params):
     return shuffled((base64.encodestring(c), '') for c in string * 10)
Ejemplo n.º 9
0
def open(url, task=None):
    label = task.group_label if task else None
    return SerialInput(shuffled(inputlist([url], label=label)),
                       open=lambda url: schemes.open_chain(url, task=task))
Ejemplo n.º 10
0
 def map(string, params):
     return shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(string * 10))
Ejemplo n.º 11
0
def open(url, task=None):
    label = task.group_label if task else None
    return SerialInput(shuffled(inputlist([url], label=label)),
                       open=lambda url: schemes.open_chain(url, task=task))
Ejemplo n.º 12
0
def Map(interface, state, label, inp):
    out = interface.output(0)
    for i in inp:
        for k, v in shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(str_to_bytes(i) * 10)):
            out.add(k, v)
Ejemplo n.º 13
0
 def map(string, params):
     return shuffled((base64.encodestring(str_to_bytes(c)), b'')
                     for c in bytes_to_str(string * 10))
Ejemplo n.º 14
0
def Map(interface, state, label, inp):
    out = interface.output(0)
    for i in inp:
        for k, v in shuffled((base64.encodestring(str_to_bytes(c)), b'')
                             for c in bytes_to_str(str_to_bytes(i) * 10)):
            out.add(k, v)
Ejemplo n.º 15
0
def open(url, task=None):
    partition = str(task.taskid) if task else None
    return SerialInput(shuffled(inputlist([url], partition=partition)),
                       open=lambda url: schemes.open_chain(url, task=task))
Ejemplo n.º 16
0
def open(url, task=None):
    partition = str(task.taskid) if task else None
    return SerialInput(shuffled(inputlist([url], partition=partition)),
                       open=lambda url: schemes.open_chain(url, task=task))