Beispiel #1
0
def _do_map(input_reader, processor_func, finalize_func, params, _shards,
            _output_writer, _output_writer_kwargs, _job_name, _queue_name,
            *processor_args, **processor_kwargs):

    handler_spec = qualname(unpacker)
    handler_params = {
        "func": qualname(processor_func)
        if callable(processor_func) else processor_func,
        "args": processor_args,
        "kwargs": processor_kwargs
    }

    handler_params.update(params)

    pipelines = []
    pipelines.append(
        MapperPipeline(_job_name,
                       handler_spec=handler_spec,
                       input_reader_spec=qualname(input_reader),
                       output_writer_spec=qualname(_output_writer)
                       if _output_writer else None,
                       params=handler_params,
                       shards=_shards))

    if finalize_func:
        pipelines.append(
            CallbackPipeline(
                qualname(finalize_func) if callable(finalize_func) else
                finalize_func, *processor_args, **processor_kwargs))

    new_pipeline = DynamicPipeline(pipelines)
    new_pipeline.start(queue_name=_queue_name or 'default')
    return new_pipeline
Beispiel #2
0
def _do_map(
    input_reader, processor_func, finalize_func, params,
    _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name,
    *processor_args, **processor_kwargs):

    start_pipeline = processor_kwargs.pop('start_pipeline', True)

    handler_spec = qualname(unpacker)
    handler_params = {
        "func": qualname(processor_func) if callable(processor_func) else processor_func,
        "args": processor_args,
        "kwargs": processor_kwargs,
        "_finalized": qualname(finalize_func) if callable(finalize_func) else finalize_func
    }

    handler_params.update(params)

    new_pipeline = MapperPipeline(
        _job_name,
        handler_spec=handler_spec,
        input_reader_spec=qualname(input_reader),
        output_writer_spec=qualname(_output_writer) if _output_writer else None,
        params=handler_params,
        shards=_shards
    )

    if start_pipeline:
        new_pipeline.start(queue_name=_queue_name or 'default')

    return new_pipeline
Beispiel #3
0
def map_reduce_queryset(queryset, map_func, reduce_func, output_writer, *args, **kwargs):

    """
        Does a complete map-shuffle-reduce over the queryset

        output_writer should be a mapreduce OutputWriter subclass

        Returns the pipeline
    """
    map_func = qualname(map_func)
    reduce_func = qualname(reduce_func)
    output_writer = qualname(output_writer)

    options = extract_options(kwargs)

    _shards = options.pop("_shards", None)
    _job_name = options.pop("_job_name", "Map reduce task over {}".format(queryset.model))
    _queue_name = options.pop("_queue_name", 'default')

    pipeline = MapreducePipeline(
        _job_name,
        map_func,
        reduce_func,
        qualname(DjangoInputReader),
        output_writer,
        mapper_params={
            "input_reader": DjangoInputReader.params_from_queryset(queryset),
        },
        reducer_params={
            'output_writer': options.pop("_output_writer_kwargs", {}) or {}
        },
        shards=_shards
    )
    pipeline.start(queue_name=_queue_name)
    return pipeline
Beispiel #4
0
def map_reduce_queryset(queryset, map_func, reduce_func, output_writer, *args,
                        **kwargs):
    """
        Does a complete map-shuffle-reduce over the queryset

        output_writer should be a mapreduce OutputWriter subclass

        Returns the pipeline
    """
    map_func = qualname(map_func)
    reduce_func = qualname(reduce_func)
    output_writer = qualname(output_writer)

    options = extract_options(kwargs)

    _shards = options.pop("_shards", None)
    _job_name = options.pop("_job_name",
                            "Map reduce task over {}".format(queryset.model))
    _queue_name = options.pop("_queue_name", 'default')

    pipeline = MapreducePipeline(
        _job_name,
        map_func,
        reduce_func,
        qualname(DjangoInputReader),
        output_writer,
        mapper_params={
            "input_reader": DjangoInputReader.params_from_queryset(queryset),
        },
        reducer_params={
            'output_writer': options.pop("_output_writer_kwargs", {}) or {}
        },
        shards=_shards)
    pipeline.start(queue_name=_queue_name)
    return pipeline
Beispiel #5
0
def _do_map(input_reader, processor_func, finalize_func, params, _shards,
            _output_writer, _output_writer_kwargs, _job_name, _queue_name,
            *processor_args, **processor_kwargs):

    start_pipeline = processor_kwargs.pop('start_pipeline', True)

    handler_spec = qualname(unpacker)
    handler_params = {
        "func":
        qualname(processor_func)
        if callable(processor_func) else processor_func,
        "args":
        processor_args,
        "kwargs":
        processor_kwargs,
        "_finalized":
        qualname(finalize_func) if callable(finalize_func) else finalize_func
    }

    handler_params.update(params)

    new_pipeline = MapperPipeline(_job_name,
                                  handler_spec=handler_spec,
                                  input_reader_spec=qualname(input_reader),
                                  output_writer_spec=qualname(_output_writer)
                                  if _output_writer else None,
                                  params=handler_params,
                                  shards=_shards)

    if start_pipeline:
        new_pipeline.start(queue_name=_queue_name or 'default')

    return new_pipeline
Beispiel #6
0
def map_reduce_entities(kind_name, namespace, map_func, reduce_func,
                        output_writer, *args, **kwargs):
    """
        Does a complete map-shuffle-reduce over the entities

        output_writer should be a mapreduce OutputWriter subclass
        _filters is an optional kwarg which will be passed directly to the input reader

        Returns the pipeline
    """
    map_func = qualname(map_func)
    reduce_func = qualname(reduce_func)
    output_writer = qualname(output_writer)

    options = extract_options(kwargs, additional={"_filters"})

    _shards = options.pop("_shards", None)
    _job_name = options.pop("_job_name",
                            "Map reduce task over {}".format(kind_name))
    _queue_name = options.pop("_queue_name", 'default')

    pipeline = MapreducePipeline(
        _job_name,
        map_func,
        reduce_func,
        qualname(RawDatastoreInputReader),
        output_writer,
        mapper_params={
            'input_reader': {
                RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name,
                RawDatastoreInputReader.NAMESPACE_PARAM: namespace,
                RawDatastoreInputReader.FILTERS_PARAM:
                options.pop("_filters", [])
            },
        },
        reducer_params={
            'output_writer': options.pop("_output_writer_kwargs", {}) or {}
        },
        shards=_shards)
    pipeline.start(queue_name=_queue_name)
    return pipeline
Beispiel #7
0
def map_reduce_entities(kind_name, namespace, map_func, reduce_func, output_writer, *args, **kwargs):
    """
        Does a complete map-shuffle-reduce over the entities

        output_writer should be a mapreduce OutputWriter subclass
        _filters is an optional kwarg which will be passed directly to the input reader

        Returns the pipeline
    """
    map_func = qualname(map_func)
    reduce_func = qualname(reduce_func)
    output_writer = qualname(output_writer)

    options = extract_options(kwargs, additional={"_filters"})

    _shards = options.pop("_shards", None)
    _job_name = options.pop("_job_name", "Map reduce task over {}".format(kind_name))
    _queue_name = options.pop("_queue_name", 'default')

    pipeline = MapreducePipeline(
        _job_name,
        map_func,
        reduce_func,
        qualname(RawDatastoreInputReader),
        output_writer,
        mapper_params={
            'input_reader': {
                RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name,
                RawDatastoreInputReader.NAMESPACE_PARAM: namespace,
                RawDatastoreInputReader.FILTERS_PARAM: options.pop("_filters", [])
            },
        },
        reducer_params={
            'output_writer': options.pop("_output_writer_kwargs", {}) or {}
        },
        shards=_shards
    )
    pipeline.start(queue_name=_queue_name)
    return pipeline
Beispiel #8
0
def map_reduce_entities(kind_name, map_func, reduce_func, output_writer, *args,
                        **kwargs):
    """
        Does a complete map-shuffle-reduce over the entities

        output_writer should be a mapreduce OutputWriter subclass

        Returns the pipeline
    """
    map_func = qualname(map_func)
    reduce_func = qualname(reduce_func)
    output_writer = qualname(output_writer)

    options = extract_options(kwargs)

    _shards = options.pop("_shards", None)
    _job_name = options.pop("_job_name",
                            "Map reduce task over {}".format(kind_name))
    _queue_name = options.pop("_queue_name", 'default')

    pipeline = MapreducePipeline(
        _job_name,
        map_func,
        reduce_func,
        qualname(RawDatastoreInputReader),
        output_writer,
        mapper_params={
            'input_reader': {
                RawDatastoreInputReader.ENTITY_KIND_PARAM: kind_name
            },
        },
        reducer_params={
            'output_writer': options.pop("_output_writer_kwargs", {}) or {}
        },
        shards=_shards)
    pipeline.start(queue_name=_queue_name)
    return pipeline
Beispiel #9
0
def _do_map(
    input_reader, processor_func, finalize_func, params,
    _shards, _output_writer, _output_writer_kwargs, _job_name, _queue_name,
    *processor_args, **processor_kwargs):

    handler_spec = qualname(unpacker)
    handler_params = {
        "func": qualname(processor_func) if callable(processor_func) else processor_func,
        "args": processor_args,
        "kwargs": processor_kwargs
    }

    handler_params.update(params)

    pipelines = []
    pipelines.append(MapperPipeline(
        _job_name,
        handler_spec=handler_spec,
        input_reader_spec=qualname(input_reader),
        output_writer_spec=qualname(_output_writer) if _output_writer else None,
        params=handler_params,
        shards=_shards
    ))

    if finalize_func:
        pipelines.append(
            CallbackPipeline(
                qualname(finalize_func) if callable(finalize_func) else finalize_func,
                *processor_args,
                **processor_kwargs
            )
        )

    new_pipeline = DynamicPipeline(pipelines)
    new_pipeline.start(queue_name=_queue_name or 'default')
    return new_pipeline