Example #1
0
    def __init__(self, **kwargs):
        kwargs['args'] = ['--input-protocol', 'repr',
                          '--python-archive', Package.create()] + \
                          kwargs.get('args', [])

        super(Propagate, self).__init__(**kwargs)
        self.network = getattr(Network.Network, self.options.network)

        if any(self.args) and not Propagate._initialized:
            Propagate._initialized = True
            self.options.python_archives.append(Package.create())
Example #2
0
    def __init__(self, **kwargs):
        kwargs['args'] = ['--input-protocol', 'repr', 
                          '--python-archive', Package.create()] + \
                          kwargs.get('args', [])

        super(Propagate, self).__init__(**kwargs)
        self.network = getattr(Network.Network, self.options.network)

        if any(self.args) and not Propagate._initialized:
            Propagate._initialized = True
            self.options.python_archives.append(Package.create())
Example #3
0
    def __init__(self, **kwargs):
        # Note that EMR is required for Schimmy propagation
        kwargs['args'] = \
            ['--input-protocol', 'repr', 
             '-r', 'emr',
             '--hadoop-version', '0.20',
             '--hadoop-arg', '-partitioner',
             '--hadoop-arg', 'org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner',
             '--jobconf', 'mapred.text.key.partitioner.options=-k1,1', 
             '--jobconf', 'map.output.key.field.separator=,'] + \
             kwargs.get('args', [])
        super(Propagate, self).__init__(**kwargs)

        self.network = getattr(Network.Network, self.options.network)
        # In Schimmy, the #partitions is always equal to #reducers
        self.options.jobconf['mapred.reduce.tasks'] = self.options.partitions

        # Initialize exactly once by creating our initial partition files
        # and packaging the relevant Python scripts
        if any(self.args) and not Propagate._initialized:
            Propagate._initialized = True
            self.options.upload_archives.append('%s#partitions' % \
                Partitions.create(self.args[0], 
                                  self.options.partitions, 
                                  self.partition, True))
            self.options.python_archives.append(Package.create())