def execute(self, frame: FrameType) -> bytes: raise ScannerException('Test')
def test_fault_tolerance(fault_db): force_kill_spawn_port = 5012 normal_spawn_port = 5013 def worker_killer_task(config, master_address): from scannerpy import ProtobufGenerator, Config, start_worker import time import grpc import subprocess import signal import os c = Config(None) import scanner.metadata_pb2 as metadata_types import scanner.engine.rpc_pb2 as rpc_types import scanner.types_pb2 as misc_types import scannerpy.libscanner as bindings protobufs = ProtobufGenerator(config) # Spawn a worker that we will force kill script_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.devnull, 'w') as fp: p = subprocess.Popen( [ 'python ' + script_dir + '/spawn_worker.py {:d}'.format(force_kill_spawn_port) ], shell=True, stdout=fp, stderr=fp, preexec_fn=os.setsid) # Wait a bit for the worker to do its thing time.sleep(10) # Force kill worker process to trigger fault tolerance os.killpg(os.getpgid(p.pid), signal.SIGTERM) p.kill() p.communicate() # Wait for fault tolerance to kick in time.sleep(15) # Spawn the worker again subprocess.call( [ 'python ' + script_dir + '/spawn_worker.py {:d}'.format(normal_spawn_port) ], shell=True) master_addr = fault_db._master_address killer_process = Process( target=worker_killer_task, args=(fault_db.config, master_addr)) killer_process.daemon = True killer_process.start() frame = fault_db.ops.FrameInput() range_frame = frame.sample() sleep_frame = fault_db.ops.SleepFrame(ignore=range_frame) output_op = fault_db.ops.Output(columns=[sleep_frame]) job = Job(op_args={ frame: fault_db.table('test1').column('frame'), range_frame: fault_db.sampler.range(0, 20), output_op: 'test_fault', }) bulk_job = BulkJob(output=output_op, jobs=[job]) table = fault_db.run( bulk_job, pipeline_instances_per_node=1, force=True, show_progress=False) table = table[0] assert len([_ for _, _ in table.column('dummy').load()]) == 20 # Shutdown the spawned worker channel = grpc.insecure_channel( 'localhost:' + str(normal_spawn_port), options=[('grpc.max_message_length', 24499183 * 2)]) worker = fault_db.protobufs.WorkerStub(channel) try: worker.Shutdown(fault_db.protobufs.Empty()) except grpc.RpcError as e: status = e.code() if status == grpc.StatusCode.UNAVAILABLE: print('could not shutdown worker!') exit(1) else: raise ScannerException('Worker errored with status: {}' .format(status)) killer_process.join()
def test_fault_tolerance(fault_cl): force_kill_spawn_port = 5012 normal_spawn_port = 5013 def worker_killer_task(config, master_address): from scannerpy import Config, start_worker, protobufs import time import grpc import subprocess import signal import os import scanner.metadata_pb2 as metadata_types import scanner.engine.rpc_pb2 as rpc_types import scanner.types_pb2 as misc_types # Spawn a worker that we will force kill script_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.devnull, 'w') as fp: p = subprocess.Popen([ 'python3 ' + script_dir + '/spawn_worker.py {:d}'.format(force_kill_spawn_port) ], shell=True, stdout=fp, stderr=fp, preexec_fn=os.setsid) # Wait a bit for the worker to do its thing time.sleep(10) # Force kill worker process to trigger fault tolerance os.killpg(os.getpgid(p.pid), signal.SIGTERM) p.kill() p.communicate() # Wait for fault tolerance to kick in time.sleep(15) # Spawn the worker again subprocess.call([ 'python3 ' + script_dir + '/spawn_worker.py {:d}'.format(normal_spawn_port) ], shell=True) master_addr = fault_cl._master_address killer_process = Process(target=worker_killer_task, args=(fault_cl.config, master_addr)) killer_process.daemon = True killer_process.start() input = NamedVideoStream(fault_cl, 'test1') frame = fault_cl.io.Input([input]) range_frame = fault_cl.streams.Range(frame, ranges=[{ 'start': 0, 'end': 20 }]) sleep_frame = fault_cl.ops.SleepFrame(ignore=range_frame) output = NamedStream(fault_cl, 'test_fault') output_op = fault_cl.io.Output(sleep_frame, [output]) fault_cl.run(output_op, PerfParams.estimate(pipeline_instances_per_node=1), cache_mode=CacheMode.Overwrite, show_progress=False) assert output.len() == 20 # Shutdown the spawned worker channel = grpc.insecure_channel('localhost:' + str(normal_spawn_port), options=[('grpc.max_message_length', 24499183 * 2)]) worker = protobufs.WorkerStub(channel) try: worker.Shutdown(protobufs.Empty()) except grpc.RpcError as e: status = e.code() if status == grpc.StatusCode.UNAVAILABLE: print('could not shutdown worker!') exit(1) else: raise ScannerException( 'Worker errored with status: {}'.format(status)) killer_process.join()