def spawn_services(self): """ Spawn remote services via some bootstrap method. """ assert not self.service_handles, "ERROR: services already running; " \ "shut them down before spawning again" if self._service_infos is None: self._service_infos = [] my_ip = utils.get_my_IP() start_port = config.get_service_port() for port in range(start_port, start_port + self.DEFAULT_INSTANCE_COUNT): ci = bootstrap.ConnectionInfo(my_ip, port, None) self._service_infos.append(ci) bootstrap.ServiceHandleArray.REMOTE_CONNECTION_CLASS = \ self._service_handler_class self.service_handles = \ bootstrap.ServiceHandleArray(self._service_infos) logger.debug("Attempting to start services of type %s", self.service_name) self.service_handles.start_cmd(self.service_name) logger.debug("Signaled services should start")
def __init__(self, connection_info): if connection_info.port is None or connection_info.ip is None: connection_info.ip = utils.get_my_IP() connection_info.port = config.get_service_port() super(ServiceHandler_Local, self).__init__(connection_info) self._proc = None
def add_socket_handler(host, port): """ Host and port should point back to the logging port of the test coordinator (that is - the machine where e.g. a ServiceGroup is running). """ if isinstance(logger.net_logger, NetLogAdapter): return socket_handler = logging.handlers.SocketHandler(host, port) net_logger = logging.getLogger(BASE_LOGGER_NAME + ".net") # Don't bother with a formatter, since a socket handler sends the event as # an unformatted pickle net_logger.addHandler(socket_handler) my_ip = get_my_IP() nla = NetLogAdapter(net_logger, {'my_ip': my_ip}) logger.net_logger = nla
def exposed_start_remote_logging(self, ip, port, log_dir=None, log_namespace=None, verbose=0, **kwargs): """ Args: ip, port - where we connect our logging socket handler log_dir - where we output our logs """ my_ip = utils.get_my_IP() if verbose == 2: if self.verbose_file_path is None: central_logger.set_level(central_logger.DEBUG) if log_dir is not None: log_fname = "%s.%d.%d.log" % ( str(my_ip), self._service_port, int(time.time())) if log_namespace is not None: log_fname = log_namespace + "." + log_fname verbose_file_path = os.path.join(log_dir, log_fname) self.verbose_file_path = verbose_file_path central_logger.add_file_handler(verbose_file_path) if self.trace_file_path is None and log_dir is not None: central_logger.set_level(central_logger.DEBUG) if log_namespace is None: log_namespace = log_dir log_namespace += "%s.%d.%d.log" % (str(my_ip), self._service_port, int(time.time())) central_logger.add_op_trace(log_dir, WalkOpTracer, log_namespace) self.trace_file_path = logger.op_trace.fname central_logger.add_socket_handler(ip, port) return (self.verbose_file_path, self.trace_file_path)
@classmethod def from_json(cls, obj): return cls(*obj) class SimpleImportService(worker.CoordinatorService): def work_repack(self, work, state, resp=None): work = int(work) work_out = SimpleRunner(work) return work_out if __name__ == "__main__": this_module = "combtest.test.test_worker." my_ip = utils.get_my_IP() sg = worker.ServiceGroup(this_module + SimpleImportService.__name__) # Test sending a batch of work WORK_SIZE = 100006 print("Test: scattering %d units of work" % WORK_SIZE) a_bunch_of_work = range(WORK_SIZE) start_time = time.time() worker_ids = sg.scatter_work(a_bunch_of_work, state={}) print("scatter took", time.time() - start_time) sg.join() master = {} start_time = time.time() for con, worker_id in worker_ids.items():
import tempfile import unittest from combtest.action import OptionSet, SerialAction import combtest.central_logger as central_logger import combtest.encode as encode import combtest.replay as replay import combtest.runner as runner import combtest.walk as walk from combtest.utils import get_my_IP import combtest.test.classes.actions as actions MY_IP = get_my_IP() def random_option(option_set): options = [option for option in option_set] return random.choice(options) class TestReplayByLib(unittest.TestCase): """ This is pretty much just a local 'run walk' function. Let's get code coverage of it anyway. """ def _get_random_walk(self): s1 = random_option(actions.SerialActionAppend1.get_option_set()) a1 = random_option(actions.ActionAppend1.get_option_set()) a2 = random_option(actions.ActionAppend1.get_option_set())
def run_tests( walk_order, state=None, verbose=1, logger_port=None, runner_class=MultistageWalkRunningService, service_group_class=ContinuingWalkServiceGroup, service_infos=None, service_handler_class=bootstrap.ServiceHandler_Local, max_thread_count=None, gather_states=False, log_dir=None, ): """ Run a collection of :class:`combtest.walk.Walk`. This should be the main way to execute ``Walks`` for most users. This is the only interface that supports correct execution of a :class:`combtest.action.SerialAction`. You can provide some instance to serve as the state passed around during the tests. There are two important details to know about this: * The state must be JSON-ifiable, but py-combtest provides a convenience pattern to help with that. See :func:`encode`. * Shallow copies of the state will be made, via copy.copy(), since each test owns its own copy. You may want to e.g. override __copy__ if the details of the copy are important to you. :param iterable walk_order: An iterable of iterables which produce :class:`combtest.action.Action`. Example: a list of iterables produced by ``MyActionClass.get_option_set()``. :param object state: a state to copy and pass to the ``Walks`` when we execute them. :param int verbose: 0-2 verbosity setting. At 2 an additional verbose level log will be produced. :param int logger_port: the port number where our local logger should accept data. :param combtest.worker.CoordinatorService runner_class: the type of Walk execution service to use. :param combtest.worker.ServiceGroup service_group_class: the type of ``ServiceGroup`` we will use to coordinate remote executors :param iterable service_infos: An iterable of any extra infos we need to bootstrap the remote services. See :class:`combtest.bootstrap.ServiceHandleArray`. :param combtest.bootstrap.ServiceHandler service_handler_class: Type of ``ServiceHandler`` to use to bootstrap the services. :param bool gather_states: if True or 1, gather and return all ``states`` from the remote services at the end of the run. Will be returned as a mapping ip->[state, ...]. if 2, gather extra info about the run of the walk, such as if it was canceled. else the returned states will be None :param int max_thread_count: Max number of ``Walk`` executing threads that each service will use. :param str log_dir: Directory where we will store traces, debug logs, etc. Remote services will also attempt to store logs to the same path. :raises RuntimeError: when remote services can't be established and connected to. :return: count of walks run, count of walk execution errors, count of walk segments run, total elapsed time, remote states if ``gather_state == True`` else None, the location of the master log file, where applicable. """ if logger_port is None: logger_port = config.get_logger_port() if verbose == 0: central_logger.set_level(central_logger.WARNING) elif verbose == 1: central_logger.set_level(central_logger.INFO) else: central_logger.set_level(central_logger.DEBUG) my_ip = utils.get_my_IP() if log_dir is not None: central_logger.log_status("Log files will be at: %s", log_dir) # Used to give us some data that connects us back to the remote # workers. e.g. where their logs are being stored. central_logger.add_op_trace(log_dir, central_logger.OpTracer) central_logger.log_status("Log master at: %s", logger.op_trace.fname) # TODO? Pull files back from remote side via rpyc? # Set up remote logging w/local printing central_logger.start_recv_remote_logs(my_ip, logger_port) sg = None try: # Get the test case generator. wo = walk.WalkOptions(walk_order) # Bring up services across the cluster which can execute Walks in parallel. # See worker.py docs on the wiki for details about how this works. service_qualname = utils.get_class_qualname(runner_class) central_logger.log_status("Bringing up services to run some tests") sg = service_group_class(service_qualname, service_infos=service_infos, service_handler_class=service_handler_class) remote_log_locations = sg.start_remote_logging(my_ip, logger_port, log_dir, verbose=verbose) master_location = "" remote_logs = [] for logs in remote_log_locations.values(): remote_logs.extend(logs) if any(remote_logs): logger.trace_op(id='master') for ip, log_locations in remote_log_locations.items(): logger.trace_op(ip=ip, logs=log_locations) master_location = logger.op_trace.fname master_log = { 'master': master_location, 'remote': remote_log_locations } logger.info("Services are up") logger.info("Scattering work") start_time = time.time() master_worker_ids = {} for epoch_list in wo: logger.info("Epoch list has %d epochs", len(epoch_list)) for epoch in epoch_list: state_copy = copy.copy(state) if epoch.serial_action is not None: for branch_id in epoch.branch_ids: state_copy = epoch.serial_action( state=state_copy, branch_id=branch_id, epoch=epoch, service=sg, worker_ids=master_worker_ids) _, count, worker_ids = sg.scatter_work( epoch, state=state_copy, max_thread_count=max_thread_count) logger.info("Epoch of work sent; %d work items", count) for connection_info, ids in worker_ids.items(): if connection_info not in master_worker_ids: master_worker_ids[connection_info] = [] master_worker_ids[connection_info].extend(ids) logger.info("Epochs started; waiting for " "them to finish") sg.join() logger.info("Work finished; gathering responses") segment_count = 0 error_count = 0 walk_count = 0 # List of walk_ids failed_tests = [] for connection_info, ids in master_worker_ids.items(): if len(ids) == 0: # No work sent, e.g. because we didn't have many walks continue # NOTE: taking advantage of singleton wid = ids[0] wids = { connection_info: [ wid, ] } current_segment_count, current_error_count, \ current_walk_count, current_failed_walk_ids = \ sg.gather_all_runner_states(wids) segment_count += current_segment_count error_count += current_error_count walk_count += current_walk_count failed_tests.extend(current_failed_walk_ids) elapsed = time.time() - start_time central_logger.log_status("Ran %d walks (%d errors) in %0.2fs" % (walk_count, error_count, elapsed)) if gather_states is True or gather_states == 1: states_out = sg.gather_all_states(worker_ids, full=False) elif gather_states == 2: states_out = sg.gather_all_states(worker_ids, full=True) else: states_out = None if log_dir is not None: sg.provide_logs(log_dir) finally: central_logger.stop_recv_remote_logs() try: if sg is not None: sg.shutdown(hard=True) except Exception: pass return Result(walk_count, error_count, segment_count, elapsed, states_out, master_log, failed_tests)