Ejemplos de Factory en Python

Lenguaje de programación: Python

Namespace/Package Name: pydoop.mapreduce.pipes

Clase / Tipo: Factory

Ejemplos en hotexamples.com: 8

Python Factory - 8 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pydoop.mapreduce.pipes.Factory extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Factory(8)

Métodos usados con frecuencia

Factory (8)

Ejemplo n.º 1

Mostrar archivo

import struct

from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer


class FilterMapper(Mapper):
    def map(self, context):
        dst, cnt = context.key, context.value
        cnt = struct.unpack(">i", cnt)[0]
        context.emit(0, (dst, cnt))


class FilterReducer(Reducer):
    def reduce(self, context):
        dic = {}
        lst = []
        for pair in list(context.values):
            dst, cnt = pair[0], pair[1]
            dic[dst] = cnt
        lst = sorted(dic.items(), key=lambda t: t[1], reverse=True)

        for i in range(50):
            context.emit(lst[i][0], lst[i][1])


if __name__ == "__main__":
    factory = Factory(FilterMapper, FilterReducer)
    run_task(factory)

Ejemplo n.º 2

Mostrar archivo

from pydoop.mapreduce.api import Mapper, Reducer
"""
Count followers of each node
Input : directed graph
    e.g.) "3   4" indicates that person 3 has 4 followers.
Output : (destination, follower count)
    e.g.) "4 2" node 4 has 2 followers.
"""


class DstCountMapper(Mapper):
    def map(self, context):
        # Implements your codes
        x = context.value.split()
        context.emit(x[1], 1)
        #	pass


class DstCountReducer(Reducer):
    def reduce(self, context):
        # Implements your codes
        #with open("asd.txt", "w") as f:
        #	f.write("debug\n")
        context.emit("", "")
        #	pass


if __name__ == "__main__":
    factory = Factory(DstCountMapper, DstCountReducer)
    run_task(factory, auto_serialize=False)

Ejemplo n.º 3

Mostrar archivo

        self.ctx = ctx
        LOGGER.info("Mapper instantiated")

    def map(self, ctx):
        words = re.sub('[^0-9a-zA-Z]+', ' ', ctx.value).split()
        for w in words:
            ctx.emit(w, 1)


class TReducer(Reducer):
    def __init__(self, ctx):
        super(TReducer, self).__init__(ctx)
        self.ctx = ctx
        LOGGER.info("Reducer instantiated")

    def reduce(self, ctx):
        s = sum(ctx.values)
        # Note: we explicitly write the value as a str.
        ctx.emit(ctx.key, str(s))


FACTORY = Factory(mapper_class=TMapper, reducer_class=TReducer)


def main():
    run_task(FACTORY)


if __name__ == "__main__":
    main()

Ejemplo n.º 4

Mostrar archivo

Archivo: filter.py Proyecto: muhammadyaseen/pydoop

import struct

from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer


class FilterMapper(Mapper):
    """
    Process a wordcount output stream, emitting only records relative to
    words whose count is equal to or above the configured threshold.
    """
    def __init__(self, context):
        super(FilterMapper, self).__init__(context)
        jc = context.job_conf
        self.threshold = jc.get_int("filter.occurrence.threshold")

    def map(self, context):
        word, occurrence = context.key, context.value
        occurrence = struct.unpack(">i", occurrence)[0]
        if occurrence >= self.threshold:
            context.emit(word, str(occurrence))


class FilterReducer(Reducer):
    def reduce(self, context):
        pass


if __name__ == "__main__":
    run_task(Factory(FilterMapper, FilterReducer))

Ejemplo n.º 5

Mostrar archivo

Archivo: wordcount.py Proyecto: wtj/pydoop

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# END_COPYRIGHT

import struct
import re

from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer


class WordCountMapper(Mapper):

    def map(self, context):
        words = re.sub('[^0-9a-zA-Z]+', ' ', context.value).split()
        for w in words:
            context.emit(w, 1)


class WordCountReducer(Reducer):

    def reduce(self, context):
        s = sum(context.values)
        context.emit(context.key, struct.pack(">i", s))


if __name__ == "__main__":
    run_task(Factory(WordCountMapper, WordCountReducer))

Ejemplo n.º 6

Mostrar archivo

"""
Filter out words whose occurrence falls below a specified value.
"""

import struct

from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper


class FilterMapper(Mapper):
    """
    Process a wordcount output stream, emitting only records relative to
    words whose count is equal to or above the configured threshold.
    """
    def __init__(self, context):
        super(FilterMapper, self).__init__(context)
        jc = context.job_conf
        self.threshold = jc.get_int("filter.occurrence.threshold")

    def map(self, context):
        word, occurrence = context.key, context.value
        occurrence = struct.unpack(">i", occurrence)[0]
        if occurrence >= self.threshold:
            context.emit(word, str(occurrence))


if __name__ == "__main__":
    factory = Factory(FilterMapper)
    run_task(factory, raw_values=True)

Ejemplo n.º 7

Mostrar archivo

# under the License.
#
# END_COPYRIGHT

import struct

from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer


class WordCountMapper(Mapper):
    def map(self, context):
        #print("============== Map Start! ======================")
        for w in context.value.split():
            context.emit(w, 1)
        #print("============== Map Start! ======================")


class WordCountReducer(Reducer):
    def reduce(self, context):
        #print("============== reduce Start! ======================")
        s = sum(context.values)
        #context.emit(context.key.encode("utf-8"), struct.pack(">i", s))
        context.emit(context.key, s)
        #print("============== reduce Start! ======================")


if __name__ == "__main__":
    factory = Factory(WordCountMapper, WordCountReducer)
    run_task(factory, auto_serialize=False)

Ejemplo n.º 8

Mostrar archivo

def main():
    return run_task(Factory(Mapper, Reducer, combiner_class=Reducer))