コード例 #1
0
    def prepare_input_map(self, task, stage, params):
        # The input map maps a label to a sequence of inputs with that
        # label.
        map = defaultdict(list)

        for l, i in util.chainify(self.labelexpand(task, stage, i, params)
                                  for i in self.get_inputs()):
            if stage.combine_labels:
                map[0].append(i)
            else:
                map[l].append(i)

        if stage.sort:
            newmap = {}
            if stage.input_sorted:
                for label, inputs in map.iteritems():
                    input = merge(*(merge_wrapper(inp,
                                                  sort_range=stage.sort,
                                                  desc=stage.desc)
                                    for inp in inputs))
                    newmap[label] = [input]
            else:
                for label, inputs in map.iteritems():

                    input = chainify(shuffled(inputs))
                    newmap[label] = [disk_sort(input,
                                               task.path('sort.dl'),
                                               sort_keys=stage.sort,
                                               sort_buffer_size='15%',
                                               binaries=stage.binaries,
                                               desc=stage.desc)]
            map = newmap
        #print "OUTSIE: %s" % str(map)
        return map
コード例 #2
0
ファイル: pipeworker.py プロジェクト: dinedal/hustle
    def prepare_input_map(self, task, stage, params):
        # The input map maps a label to a sequence of inputs with that
        # label.
        map = defaultdict(list)

        for l, i in util.chainify(self.labelexpand(task, stage, i, params) for i in self.get_inputs()):
            if stage.combine_labels:
                map[0].append(i)
            else:
                map[l].append(i)

        if stage.sort:
            newmap = {}
            if stage.input_sorted:
                for label, inputs in map.iteritems():
                    input = merge(*(merge_wrapper(inp, sort_range=stage.sort, desc=stage.desc) for inp in inputs))
                    newmap[label] = [input]
            else:
                for label, inputs in map.iteritems():

                    input = chainify(shuffled(inputs))
                    newmap[label] = [disk_sort(input,
                                               task.path('sort.dl'),
                                               sort_keys=stage.sort,
                                               sort_buffer_size='15%',
                                               binaries=stage.binaries,
                                               desc=stage.desc)]
            map = newmap
        #print "OUTSIE: %s" % str(map)
        return map
コード例 #3
0
ファイル: worker.py プロジェクト: Dieterbe/disco
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     partition = None
     if ispartitioned(inputs) and not self['merge_partitions']:
         partition = str(task.taskid)
     return self.sort(SerialInput(shuffled(inputlist(inputs, partition=partition)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
コード例 #4
0
ファイル: worker.py プロジェクト: yongmou/disco
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     label = None
     if ispartitioned(inputs) and not self['merge_partitions']:
         label = task.group_label
     return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
コード例 #5
0
ファイル: worker.py プロジェクト: AlexArgus/disco
 def reduce_input(self, task, params):
     # master should feed only the partitioned inputs to reduce (and shuffle them?)
     from disco.worker import SerialInput
     from disco.util import inputlist, ispartitioned, shuffled
     inputs = [[url for rid, url in i.replicas] for i in self.get_inputs()]
     label = None
     if ispartitioned(inputs):
         label = task.group_label
     return self.sort(SerialInput(shuffled(inputlist(inputs, label=label)),
                                  task=task,
                                  open=self.opener('reduce', 'in', params)),
                      task)
コード例 #6
0
ファイル: test_config.py プロジェクト: Dieterbe/disco
 def test_changes(self):
     if len(self.nodes) < 2:
         self.skipTest("Cannot test node changes with < 2 nodes")
     else:
         local = ['url://%s' % node
                  for node, max_workers in self.nodes.iteritems()
                  for x in xrange(max_workers * 2)]
         input = shuffled(local + range(self.num_workers))
         self.job = ConfigJob().run(input=self.test_server.urls(input))
         time.sleep(5)
         self.disco.config = self.config[:2]
         time.sleep(5)
         self.disco.config = self.config[:1]
         time.sleep(5)
         self.disco.config = self.config
         self.checkAnswers(self.job, input)
コード例 #7
0
ファイル: test_config.py プロジェクト: yuj/disco
 def test_changes(self):
     if len(self.nodes) < 2:
         self.skipTest("Cannot test node changes with < 2 nodes")
     else:
         local = [
             'url://{0}'.format(node)
             for node, max_workers in self.nodes.items()
             for x in range(max_workers * 2)
         ]
         input = shuffled(local + list(range(self.num_workers)))
         self.job = ConfigJob().run(input=self.test_server.urls(input))
         time.sleep(5)
         self.disco.config = self.config[:2]
         time.sleep(5)
         self.disco.config = self.config[:1]
         time.sleep(5)
         self.disco.config = self.config
         self.checkAnswers(self.job, input)
コード例 #8
0
 def map(string, params):
     return shuffled((base64.encodestring(c), '') for c in string * 10)
コード例 #9
0
ファイル: scheme_dir.py プロジェクト: AlexArgus/disco
def open(url, task=None):
    label = task.group_label if task else None
    return SerialInput(shuffled(inputlist([url], label=label)),
                       open=lambda url: schemes.open_chain(url, task=task))
コード例 #10
0
ファイル: test_sort.py プロジェクト: AlexArgus/disco
 def map(string, params):
     return shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(string * 10))
コード例 #11
0
def open(url, task=None):
    label = task.group_label if task else None
    return SerialInput(shuffled(inputlist([url], label=label)),
                       open=lambda url: schemes.open_chain(url, task=task))
コード例 #12
0
ファイル: test_pipeline_sort.py プロジェクト: AlexArgus/disco
def Map(interface, state, label, inp):
    out = interface.output(0)
    for i in inp:
        for k, v in shuffled((base64.encodestring(str_to_bytes(c)), b'') for c in bytes_to_str(str_to_bytes(i) * 10)):
            out.add(k, v)
コード例 #13
0
 def map(string, params):
     return shuffled((base64.encodestring(str_to_bytes(c)), b'')
                     for c in bytes_to_str(string * 10))
コード例 #14
0
def Map(interface, state, label, inp):
    out = interface.output(0)
    for i in inp:
        for k, v in shuffled((base64.encodestring(str_to_bytes(c)), b'')
                             for c in bytes_to_str(str_to_bytes(i) * 10)):
            out.add(k, v)
コード例 #15
0
ファイル: scheme_dir.py プロジェクト: Dieterbe/disco
def open(url, task=None):
    partition = str(task.taskid) if task else None
    return SerialInput(shuffled(inputlist([url], partition=partition)),
                       open=lambda url: schemes.open_chain(url, task=task))
コード例 #16
0
def open(url, task=None):
    partition = str(task.taskid) if task else None
    return SerialInput(shuffled(inputlist([url], partition=partition)),
                       open=lambda url: schemes.open_chain(url, task=task))