def _do_map(input_reader, processor_func, finalize_func, params, _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name, *processor_args, **processor_kwargs): handler_spec = qualname(unpacker) handler_params = { "func": qualname(processor_func) if callable(processor_func) else processor_func, "args": processor_args, "kwargs": processor_kwargs } handler_params.update(params) pipelines = [] pipelines.append( MapperPipeline(_job_name, handler_spec=handler_spec, input_reader_spec=qualname(input_reader), output_writer_spec=qualname(_output_writer) if _output_writer else None, params=handler_params, shards=_shards)) if finalize_func: pipelines.append( CallbackPipeline( qualname(finalize_func) if callable(finalize_func) else finalize_func, *processor_args, **processor_kwargs)) new_pipeline = DynamicPipeline(pipelines) new_pipeline.start(queue_name=_queue_name or 'default') return new_pipeline
def _do_map( input_reader, processor_func, finalize_func, params, _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name, *processor_args, **processor_kwargs): start_pipeline = processor_kwargs.pop('start_pipeline', True) handler_spec = qualname(unpacker) handler_params = { "func": qualname(processor_func) if callable(processor_func) else processor_func, "args": processor_args, "kwargs": processor_kwargs, "_finalized": qualname(finalize_func) if callable(finalize_func) else finalize_func } handler_params.update(params) new_pipeline = MapperPipeline( _job_name, handler_spec=handler_spec, input_reader_spec=qualname(input_reader), output_writer_spec=qualname(_output_writer) if _output_writer else None, params=handler_params, shards=_shards ) if start_pipeline: new_pipeline.start(queue_name=_queue_name or 'default') return new_pipeline
def map_reduce_queryset(queryset, map_func, reduce_func, output_writer, *args, **kwargs): """ Does a complete map-shuffle-reduce over the queryset output_writer should be a mapreduce OutputWriter subclass Returns the pipeline """ map_func = qualname(map_func) reduce_func = qualname(reduce_func) output_writer = qualname(output_writer) options = extract_options(kwargs) _shards = options.pop("_shards", None) _job_name = options.pop("_job_name", "Map reduce task over {}".format(queryset.model)) _queue_name = options.pop("_queue_name", 'default') pipeline = MapreducePipeline( _job_name, map_func, reduce_func, qualname(DjangoInputReader), output_writer, mapper_params={ "input_reader": DjangoInputReader.params_from_queryset(queryset), }, reducer_params={ 'output_writer': options.pop("_output_writer_kwargs", {}) or {} }, shards=_shards ) pipeline.start(queue_name=_queue_name) return pipeline
def map_reduce_queryset(queryset, map_func, reduce_func, output_writer, *args, **kwargs): """ Does a complete map-shuffle-reduce over the queryset output_writer should be a mapreduce OutputWriter subclass Returns the pipeline """ map_func = qualname(map_func) reduce_func = qualname(reduce_func) output_writer = qualname(output_writer) options = extract_options(kwargs) _shards = options.pop("_shards", None) _job_name = options.pop("_job_name", "Map reduce task over {}".format(queryset.model)) _queue_name = options.pop("_queue_name", 'default') pipeline = MapreducePipeline( _job_name, map_func, reduce_func, qualname(DjangoInputReader), output_writer, mapper_params={ "input_reader": DjangoInputReader.params_from_queryset(queryset), }, reducer_params={ 'output_writer': options.pop("_output_writer_kwargs", {}) or {} }, shards=_shards) pipeline.start(queue_name=_queue_name) return pipeline
def _do_map(input_reader, processor_func, finalize_func, params, _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name, *processor_args, **processor_kwargs): start_pipeline = processor_kwargs.pop('start_pipeline', True) handler_spec = qualname(unpacker) handler_params = { "func": qualname(processor_func) if callable(processor_func) else processor_func, "args": processor_args, "kwargs": processor_kwargs, "_finalized": qualname(finalize_func) if callable(finalize_func) else finalize_func } handler_params.update(params) new_pipeline = MapperPipeline(_job_name, handler_spec=handler_spec, input_reader_spec=qualname(input_reader), output_writer_spec=qualname(_output_writer) if _output_writer else None, params=handler_params, shards=_shards) if start_pipeline: new_pipeline.start(queue_name=_queue_name or 'default') return new_pipeline
def map_reduce_entities(kind_name, namespace, map_func, reduce_func, output_writer, *args, **kwargs): """ Does a complete map-shuffle-reduce over the entities output_writer should be a mapreduce OutputWriter subclass _filters is an optional kwarg which will be passed directly to the input reader Returns the pipeline """ map_func = qualname(map_func) reduce_func = qualname(reduce_func) output_writer = qualname(output_writer) options = extract_options(kwargs, additional={"_filters"}) _shards = options.pop("_shards", None) _job_name = options.pop("_job_name", "Map reduce task over {}".format(kind_name)) _queue_name = options.pop("_queue_name", 'default') pipeline = MapreducePipeline( _job_name, map_func, reduce_func, qualname(RawDatastoreInputReader), output_writer, mapper_params={ 'input_reader': { RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name, RawDatastoreInputReader.NAMESPACE_PARAM: namespace, RawDatastoreInputReader.FILTERS_PARAM: options.pop("_filters", []) }, }, reducer_params={ 'output_writer': options.pop("_output_writer_kwargs", {}) or {} }, shards=_shards) pipeline.start(queue_name=_queue_name) return pipeline
def map_reduce_entities(kind_name, namespace, map_func, reduce_func, output_writer, *args, **kwargs): """ Does a complete map-shuffle-reduce over the entities output_writer should be a mapreduce OutputWriter subclass _filters is an optional kwarg which will be passed directly to the input reader Returns the pipeline """ map_func = qualname(map_func) reduce_func = qualname(reduce_func) output_writer = qualname(output_writer) options = extract_options(kwargs, additional={"_filters"}) _shards = options.pop("_shards", None) _job_name = options.pop("_job_name", "Map reduce task over {}".format(kind_name)) _queue_name = options.pop("_queue_name", 'default') pipeline = MapreducePipeline( _job_name, map_func, reduce_func, qualname(RawDatastoreInputReader), output_writer, mapper_params={ 'input_reader': { RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name, RawDatastoreInputReader.NAMESPACE_PARAM: namespace, RawDatastoreInputReader.FILTERS_PARAM: options.pop("_filters", []) }, }, reducer_params={ 'output_writer': options.pop("_output_writer_kwargs", {}) or {} }, shards=_shards ) pipeline.start(queue_name=_queue_name) return pipeline
def map_reduce_entities(kind_name, map_func, reduce_func, output_writer, *args, **kwargs): """ Does a complete map-shuffle-reduce over the entities output_writer should be a mapreduce OutputWriter subclass Returns the pipeline """ map_func = qualname(map_func) reduce_func = qualname(reduce_func) output_writer = qualname(output_writer) options = extract_options(kwargs) _shards = options.pop("_shards", None) _job_name = options.pop("_job_name", "Map reduce task over {}".format(kind_name)) _queue_name = options.pop("_queue_name", 'default') pipeline = MapreducePipeline( _job_name, map_func, reduce_func, qualname(RawDatastoreInputReader), output_writer, mapper_params={ 'input_reader': { RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name }, }, reducer_params={ 'output_writer': options.pop("_output_writer_kwargs", {}) or {} }, shards=_shards) pipeline.start(queue_name=_queue_name) return pipeline
def _do_map( input_reader, processor_func, finalize_func, params, _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name, *processor_args, **processor_kwargs): handler_spec = qualname(unpacker) handler_params = { "func": qualname(processor_func) if callable(processor_func) else processor_func, "args": processor_args, "kwargs": processor_kwargs } handler_params.update(params) pipelines = [] pipelines.append(MapperPipeline( _job_name, handler_spec=handler_spec, input_reader_spec=qualname(input_reader), output_writer_spec=qualname(_output_writer) if _output_writer else None, params=handler_params, shards=_shards )) if finalize_func: pipelines.append( CallbackPipeline( qualname(finalize_func) if callable(finalize_func) else finalize_func, *processor_args, **processor_kwargs ) ) new_pipeline = DynamicPipeline(pipelines) new_pipeline.start(queue_name=_queue_name or 'default') return new_pipeline