Example #1
0
    def run(self, map, reduce, **jobargs):
        """Run a map-reduce job with either ``input_uri`` or ``output_uri``
        as a "mongodb://..." URI.

        .. todo:

            parameter docs
            consider "input" and "output" (sans _uri)
        """

        if not any(uri in jobargs for uri in ('input_uri', 'output_uri')):
            logging.info('You did not specify "input_uri" or "output_uri" '
                         'with MongoJob. This may be in error.')

        if 'mongodb://' in jobargs.get('input_uri', ''):
            jobargs['map_input_stream'] = mongodb_input_stream

        if 'mongodb://' in jobargs.get('output_uri', ''):
            jobargs['reduce_output_stream'] = mongodb_output_stream

        jobargs['map'] = map
        jobargs['reduce'] = reduce
        jobargs.setdefault('input', calculate_splits(jobargs))
        jobargs.setdefault('required_modules', []).extend([
            'mongodisco.mongodb_io',
            'mongodisco.mongodb_input',
            'mongodisco.mongodb_output',
            'mongodisco.mongo_util',
        ])

        super(MongoJob, self).run(**jobargs)

        if jobargs.get('print_to_stdout'):
            for key, value in classic_iterator(self.wait(show=True)):
                print key, value

        elif jobargs.get('job_wait', False):
            self.wait(show=True)

        return self
Example #2
0
    def run(self, map=None, reduce=None, **jobargs):
        """Run a map-reduce job with either ``input_uri`` or ``output_uri``
        as a "mongodb://..." URI.

        .. todo:

            parameter docs
            consider "input" and "output" (sans _uri)
        """

        if not any(uri in jobargs for uri in ('input_uri', 'output_uri', 'bson_input', 'bson_output')):
            logging.info('You did not specify "input_uri" or "output_uri" '
                         'with MongoJob. This may be in error.')

        if 'mongodb://' in jobargs.get('input_uri', ''):
            jobargs['map_input_stream'] = mongodb_input_stream
            jobargs.setdefault('input', calculate_splits(jobargs))
        elif jobargs.get('bson_input', False):
            jobargs['map_input_stream'] = bsonfile_input_stream

        if 'mongodb://' in jobargs.get('output_uri', ''):
            jobargs['reduce_output_stream'] = mongodb_output_stream
            output_params = {
                'output_uri': jobargs['output_uri'],
                'job_output_key': jobargs.get('job_output_key', '_id'),
                'job_output_value': jobargs.get('job_output_value', 'value'),
                'add_action': jobargs.get('add_action', 'insert'),
                'add_upsert': jobargs.get('add_upsert', False),
                'base_doc': jobargs.get('base_doc', {})
            }

            params = jobargs.get('params', {})
            if not isinstance(params, dict):
                raise Exception('params option must be a dict')
            params['mongodb'] = output_params
            jobargs['params'] = params

        elif jobargs.get('bson_output', False):
            jobargs['reduce_output_stream'] = bsonfile_output_stream

        if map:
            jobargs['map'] = map
        if reduce:
            jobargs['reduce'] = reduce


        jobargs.setdefault('required_modules', []).extend([
            'mongodisco.mongodb_io',
            'mongodisco.mongodb_input',
            'mongodisco.mongodb_output',
            'mongodisco.mongo_util',
            'mongodisco.bsonfile_io',
            'mongodisco.bsonfile_input',
            'mongodisco.bsonfile_output'
        ])

        super(MongoJob, self).run(**jobargs)

        if jobargs.get('print_to_stdout'):
            for key, value in classic_iterator(self.wait(show=True)):
                print key, value

        elif jobargs.get('job_wait',False):
            self.wait(show=True)

        return self