Ejemplo n.º 1
0
def worker(connection_string):
    """
    To do:
    Failure mode: 
    How to handle failures in a worker process ? current strategy:
       Ignore the current file blob and terminate the current worker process.
       Improvement is possible here... 
    """
    context = zmq.Context()
    socket = context.socket(zmq.REP)
    socket.bind("tcp://%s" % connection_string)
    print("Running worker process on:  %s\n" % connection_string)
    worker_local_statistics = {}
    try:
        while True:
            file_blob = socket.recv_unicode()
            if file_blob == EOF:
                print("Get EOF, shutdown worker process %s..." % os.getpid())
                socket.send_unicode("Get EOF, shutdown worker process %s..." % os.getpid())
                break
            else:
                socket.send_unicode("Worker process %s gets message, building index on it..." % os.getpid())
                tokenize_line(file_blob, worker_local_statistics)
    except zmq.ZMQError:
        pass
    print("Ending worker process... %s" % str(os.getpid()))
    return worker_local_statistics
Ejemplo n.º 2
0
def run():
    filename = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUT

    in_file = open(filename, 'r')
    out_file = open(output_file, 'w')

    for line in in_file.readlines():
        out_file.write(line)
        tokenize_line(line)

    out_file.close()
    in_file.close()