def worker(connection_string): """ To do: Failure mode: How to handle failures in a worker process ? current strategy: Ignore the current file blob and terminate the current worker process. Improvement is possible here... """ context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://%s" % connection_string) print("Running worker process on: %s\n" % connection_string) worker_local_statistics = {} try: while True: file_blob = socket.recv_unicode() if file_blob == EOF: print("Get EOF, shutdown worker process %s..." % os.getpid()) socket.send_unicode("Get EOF, shutdown worker process %s..." % os.getpid()) break else: socket.send_unicode("Worker process %s gets message, building index on it..." % os.getpid()) tokenize_line(file_blob, worker_local_statistics) except zmq.ZMQError: pass print("Ending worker process... %s" % str(os.getpid())) return worker_local_statistics
def run(): filename = sys.argv[1] output_file = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUT in_file = open(filename, 'r') out_file = open(output_file, 'w') for line in in_file.readlines(): out_file.write(line) tokenize_line(line) out_file.close() in_file.close()