if response.status_code != requests.codes.ok: print("Error: %s" % response.text) raise BenchmarkException("Error creating app %s" % app_name) version = 1 model = BasicNN() nn_model = train(model) deploy_and_test_model(clipper_conn, nn_model, version, link_model=True) app_and_model_name = "easy-register-app-model" create_endpoint(clipper_conn, app_and_model_name, "integers", predict, nn_model) test_model(clipper_conn, app_and_model_name, 1) except BenchmarkException: log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) sys.exit(1) else: clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) except Exception: logger.exception("Exception") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False)
def frontend(args): print(args) batch_size = args.batch_size num_models = args.num_models num_redundant_models = args.num_redundant_models func = predict red_func = predict if args.redundancy_mode == "none" or args.redundancy_mode == "equal": redundancy_mode = 0 elif args.redundancy_mode == "coded": redundancy_mode = 2 elif args.redundancy_mode == "cheap": redundancy_mode = 3 if args.queue_mode == "single_queue": queue_mode = 0 single_queue = True else: # Round robin queue_mode = 1 single_queue = False assert len(args.f_ips) == num_models + num_redundant_models if len(args.f_ports) != len(args.f_ips): assert len(args.f_ports) == 1 args.f_ports *= len(args.f_ips) model_instance_ip_port = [] red_model_instance_ip_port = [] for i in range(len(args.f_ips)): if i < num_models: model_instance_ip_port.append( (args.f_ips[i], int(args.f_ports[i]))) else: red_model_instance_ip_port.append( (args.f_ips[i], int(args.f_ports[i]))) client_ip_port = [] if len(args.f_client_ports) != len(args.f_client_ips): assert len(args.f_client_ports) == 1 args.f_client_ports *= len(args.f_client_ips) client_ip_port = [ (ip, int(port)) for ip, port in zip(args.f_client_ips, args.f_client_ports) ] cm = DistributedParmDockerContainerManager( model_instance_ip_port=model_instance_ip_port, red_model_instance_ip_port=red_model_instance_ip_port, client_ip_port=client_ip_port) clipper_conn = ClipperConnection(cm, distributed=True) frontend_args = { "redundancy_mode": redundancy_mode, "queue_mode": queue_mode, "num_models": num_models, "num_redundant_models": num_redundant_models, "batch_size": batch_size, "mode": args.f_mode } clipper_conn.start_clipper(frontend_args=frontend_args) if args.redundancy_mode == "coded": red_input_type = "floats" else: red_input_type = "bytes" pytorch_deployer.create_endpoint(clipper_conn=clipper_conn, name="example", input_type="bytes", func=func, pytorch_model=model, pkgs_to_install=['pillow'], num_replicas=num_models, batch_size=batch_size, num_red_replicas=num_redundant_models, red_func=red_func, red_input_type=red_input_type, red_pytorch_model=red_model, prefer_original=False, slo_micros=10000000 * 10) sleep_time = 5 print("Sleeping for", sleep_time, "seconds to let things start up") time.sleep(sleep_time) total_time = cm.run_clients() print(total_time) with open(args.f_outfile, 'w') as outfile: outfile.write("{:.4f}".format(total_time)) clipper_conn.stop_all()
def frontend(args): print(args) batch_size = args.batch_size num_models = args.num_models func = predict red_func = predict assert args.redundancy_mode in ["none", "equal"] redundancy_mode = 0 if args.queue_mode == "single_queue": queue_mode = 0 elif args.queue_mode == "rr": queue_mode = 1 else: assert False, "Unrecognized queue mode '{}'".format(args.queue_mode) model_instance_ip_port = [] red_model_instance_ip_port = [] cur_port = base_port if num_models < len(args.f_ips): # Round up to highest int so as not to launch more models than needed. num_between = int(len(args.f_ips) / num_models + 0.5) chosen_indices = list(range(0, len(args.f_ips), num_between)) print("Range is", chosen_indices) # Shift our chosen indices so that they are evenly distributed # throughout the clients. delta = len(args.f_ips) - chosen_indices[-1] shift = delta // 2 if len(args.f_ips) == 15: shift += 1 chosen_indices = [i + shift for i in chosen_indices] print("Shifted range is", chosen_indices) for i in chosen_indices: model_instance_ip_port.append((args.f_ips[i], cur_port)) else: for i in range(num_models): model_instance_ip_port.append( (args.f_ips[i % len(args.f_ips)], cur_port)) # Wrap around to the next port number if we will ned to repeat workers. if i % len(args.f_ips) == len(args.f_ips) - 1: cur_port += 1 print("Model instance ip, port:", model_instance_ip_port) client_ip_port = [] if len(args.f_client_ports) != len(args.f_client_ips): assert len(args.f_client_ports) == 1 args.f_client_ports *= len(args.f_client_ips) client_ip_port = [(ip, int(port)) for ip, port in zip(args.f_client_ips, args.f_client_ports)] cm = DistributedParmDockerContainerManager(model_instance_ip_port=model_instance_ip_port, red_model_instance_ip_port=red_model_instance_ip_port, client_ip_port=client_ip_port) clipper_conn = ClipperConnection(cm, distributed=True) frontend_args = { "redundancy_mode": redundancy_mode, "queue_mode": queue_mode, "num_models": num_models, "num_redundant_models": 0, "batch_size": batch_size, "mode": args.f_mode, } clipper_conn.start_clipper(frontend_args=frontend_args) red_input_type = "bytes" pytorch_deployer.create_endpoint( clipper_conn=clipper_conn, name="bg", input_type="bytes", func=func, pytorch_model=model, pkgs_to_install=['pillow'], num_replicas=num_models, batch_size=batch_size, num_red_replicas=0, red_func=red_func, red_input_type=red_input_type, red_pytorch_model=red_model, prefer_original=False, slo_micros=10000000 * 10) sleep_time = 5 print("Sleeping for", sleep_time, "seconds to let things start up") time.sleep(sleep_time) cm.run_clients(wait=False) # Listen to a ud socket to determine when we should quit. sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.bind('/home/ubuntu/bg_sock') sock.listen(5) (clientsocket, address) = sock.accept() print("Stopping all clients") cm.stop_all_clients() print("Sending response") clientsocket.sendall('1'.encode()) clientsocket.close() sock.close() print("Stopping all") clipper_conn.stop_all()