def run(self, map, reduce, **jobargs): """Run a map-reduce job with either ``input_uri`` or ``output_uri`` as a "mongodb://..." URI. .. todo: parameter docs consider "input" and "output" (sans _uri) """ if not any(uri in jobargs for uri in ('input_uri', 'output_uri')): logging.info('You did not specify "input_uri" or "output_uri" ' 'with MongoJob. This may be in error.') if 'mongodb://' in jobargs.get('input_uri', ''): jobargs['map_input_stream'] = mongodb_input_stream if 'mongodb://' in jobargs.get('output_uri', ''): jobargs['reduce_output_stream'] = mongodb_output_stream jobargs['map'] = map jobargs['reduce'] = reduce jobargs.setdefault('input', calculate_splits(jobargs)) jobargs.setdefault('required_modules', []).extend([ 'mongodisco.mongodb_io', 'mongodisco.mongodb_input', 'mongodisco.mongodb_output', 'mongodisco.mongo_util', ]) super(MongoJob, self).run(**jobargs) if jobargs.get('print_to_stdout'): for key, value in classic_iterator(self.wait(show=True)): print key, value elif jobargs.get('job_wait', False): self.wait(show=True) return self
def run(self, map=None, reduce=None, **jobargs): """Run a map-reduce job with either ``input_uri`` or ``output_uri`` as a "mongodb://..." URI. .. todo: parameter docs consider "input" and "output" (sans _uri) """ if not any(uri in jobargs for uri in ('input_uri', 'output_uri', 'bson_input', 'bson_output')): logging.info('You did not specify "input_uri" or "output_uri" ' 'with MongoJob. This may be in error.') if 'mongodb://' in jobargs.get('input_uri', ''): jobargs['map_input_stream'] = mongodb_input_stream jobargs.setdefault('input', calculate_splits(jobargs)) elif jobargs.get('bson_input', False): jobargs['map_input_stream'] = bsonfile_input_stream if 'mongodb://' in jobargs.get('output_uri', ''): jobargs['reduce_output_stream'] = mongodb_output_stream output_params = { 'output_uri': jobargs['output_uri'], 'job_output_key': jobargs.get('job_output_key', '_id'), 'job_output_value': jobargs.get('job_output_value', 'value'), 'add_action': jobargs.get('add_action', 'insert'), 'add_upsert': jobargs.get('add_upsert', False), 'base_doc': jobargs.get('base_doc', {}) } params = jobargs.get('params', {}) if not isinstance(params, dict): raise Exception('params option must be a dict') params['mongodb'] = output_params jobargs['params'] = params elif jobargs.get('bson_output', False): jobargs['reduce_output_stream'] = bsonfile_output_stream if map: jobargs['map'] = map if reduce: jobargs['reduce'] = reduce jobargs.setdefault('required_modules', []).extend([ 'mongodisco.mongodb_io', 'mongodisco.mongodb_input', 'mongodisco.mongodb_output', 'mongodisco.mongo_util', 'mongodisco.bsonfile_io', 'mongodisco.bsonfile_input', 'mongodisco.bsonfile_output' ]) super(MongoJob, self).run(**jobargs) if jobargs.get('print_to_stdout'): for key, value in classic_iterator(self.wait(show=True)): print key, value elif jobargs.get('job_wait',False): self.wait(show=True) return self