Python Factory 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pydoop.pipes

메소드/함수: Factory

hotexamples.com에서의 예제들: 6

Python Factory - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pydoop.pipes.Factory에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: wordcount-minimal.py 프로젝트: onlynone/pydoop

# License for the specific language governing permissions and limitations
# under the License.
#
# END_COPYRIGHT
"""
This example includes only the bare minimum required to run
wordcount. See wordcount-full.py for an example that uses counters,
RecordReader, etc.
"""

import pydoop.pipes as pp


class Mapper(pp.Mapper):
    def map(self, context):
        words = context.getInputValue().split()
        for w in words:
            context.emit(w, "1")


class Reducer(pp.Reducer):
    def reduce(self, context):
        s = 0
        while context.nextValue():
            s += int(context.getInputValue())
        context.emit(context.getInputKey(), str(s))


if __name__ == "__main__":
    pp.runTask(pp.Factory(Mapper, Reducer))

예제 #2

파일 보기

파일: wordcount-full.py 프로젝트: wtj/pydoop

        self.file.fs.close()

    def emit(self, key, value):
        self.file.write("%s%s%s\n" % (key, self.sep, value))


class Partitioner(pp.Partitioner):

    def __init__(self, context):
        super(Partitioner, self).__init__(context)
        self.logger = logging.getLogger("Partitioner")

    def partition(self, key, numOfReduces):
        reducer_id = (hash(key) & sys.maxint) % numOfReduces
        self.logger.debug("reducer_id: %r" % reducer_id)
        return reducer_id


if __name__ == "__main__":
    pp.runTask(pp.Factory(
        Mapper, Reducer,
        record_reader_class=Reader,
        record_writer_class=Writer,
        partitioner_class=Partitioner,
        combiner_class=Reducer,
    ))

# Local Variables:
# mode: python
# End:

예제 #3

파일 보기

파일: wordcount-minimal.py 프로젝트: xuande/pydoop

RecordReader, etc.
"""

import pydoop.pipes as pp
import re


class Mapper(pp.Mapper):
    def __init__(self, context):
        print context

    def map(self, context):
        words = re.sub('[^0-9a-zA-Z]+', ' ', context.getInputValue()).split()
        for w in words:
            context.emit(w, "1")


class Reducer(pp.Reducer):
    def __init__(self, context):
        print "Map"

    def reduce(self, context):
        s = 0
        while context.nextValue():
            s += int(context.getInputValue())
        context.emit(context.getInputKey(), str(s))


if __name__ == "__main__":
    pp.runTask(pp.Factory(mapper_class=Mapper, reducer_class=Reducer))

예제 #4

파일 보기

파일: phase_one.py 프로젝트: crs4/biodoop-core

def run_task():
    return pp.runTask(pp.Factory(Mapper, Reducer))

예제 #5

파일 보기

파일: ipcount.py 프로젝트: onlynone/pydoop

        super(Mapper, self).__init__(context)
        context.setStatus("Initialization started")
        self.excluded_counter = context.getCounter("IPCOUNT", "EXCLUDED_LINES")
        jc = context.getJobConf()
        pu.jc_configure(self, jc, "ipcount.excludes", "excludes_fn", "")
        if self.excludes_fn:
            with open(self.excludes_fn) as f:
                self.excludes = set(l.strip() for l in f if not l.isspace())
        else:
            self.excludes = set()
        context.setStatus("Initialization done")

    def map(self, context):
        ip = context.getInputValue().split(None, 1)[0]
        if ip not in self.excludes:
            context.emit(ip, "1")
        else:
            context.incrementCounter(self.excluded_counter, 1)


class Reducer(pp.Reducer):
    def reduce(self, context):
        s = 0
        while context.nextValue():
            s += int(context.getInputValue())
        context.emit(context.getInputKey(), str(s))


if __name__ == "__main__":
    pp.runTask(pp.Factory(Mapper, Reducer, combiner_class=Reducer))

예제 #6

파일 보기

        super(Reader, self).__init__()
        self.isplit = pp.InputSplit(context.getInputSplit())
        self.file = hdfs.open(self.isplit.filename)
        self.file.seek(self.isplit.offset)
        self.bytes_read = 0
        if self.isplit.offset > 0:
            discarded = self.file.readline(
            )  # read by reader of previous split
            self.bytes_read += len(discarded)

    def close(self):
        self.file.close()
        self.file.fs.close()

    def next(self):
        if self.bytes_read > self.isplit.length:  # end of input split
            return (False, "", "")
        key = struct.pack(">q", self.isplit.offset + self.bytes_read)
        record = self.file.readline()
        if record == "":  # end of file
            return (False, "", "")
        self.bytes_read += len(record)
        return (True, key, record)

    def getProgress(self):
        return min(float(self.bytes_read) / self.isplit.length, 1.0)


if __name__ == "__main__":
    pp.runTask(pp.Factory(Mapper, Reducer, record_reader_class=Reader))