Ejemplo n.º 1
0
def test_args_default_resource_limits(test_microvm_with_initrd):
    """
    Test the default resource limits are correctly set by the jailer.

    @type: security
    """
    test_microvm = test_microvm_with_initrd

    test_microvm.spawn()

    # Get firecracker's PID
    pid = int(test_microvm.jailer_clone_pid)
    assert pid != 0

    # Fetch firecracker process limits for number of open fds
    (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE)
    # Check that the default limit was set.
    assert soft == 2048
    assert hard == 2048

    # Fetch firecracker process limits for number of open fds
    (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE)
    # Check that no limit was set
    assert soft == -1
    assert hard == -1
Ejemplo n.º 2
0
    def __enter__(self):
        if self._has_entered:
            raise RuntimeError(
                "You can not use the same AsyncEvaluator in two different contexts."
            )
        self._has_entered = True

        self._input = self._queue_manager.Queue()
        self._output = self._queue_manager.Queue()

        log.debug(
            f"Process {self._main_process.pid} starting {self._n_jobs} subprocesses."
        )
        for _ in range(self._n_jobs):
            mp_process = multiprocessing.Process(
                target=evaluator_daemon,
                args=(self._input, self._output, AsyncEvaluator.defaults),
                daemon=True,
            )
            mp_process.start()

            subprocess = psutil.Process(mp_process.pid)
            self._processes.append(subprocess)

            if resource and AsyncEvaluator.memory_limit_mb:
                limit = AsyncEvaluator.memory_limit_mb * (2**20)
                resource.prlimit(subprocess.pid, resource.RLIMIT_AS,
                                 (limit, limit))

        self._log_memory_usage()
        return self
Ejemplo n.º 3
0
 def proc_rlimit(
         proc: "Process",
         res: int,
         new_limits: Optional[Tuple[int, int]] = None) -> Tuple[int, int]:
     if new_limits is None:
         return (resource.prlimit(  # pylint: disable=no-member  # pytype: disable=missing-parameter
             proc.pid, res))
     else:
         return resource.prlimit(proc.pid, res, new_limits)  # pylint: disable=no-member
Ejemplo n.º 4
0
def check_limits(pid, no_file, fsize):
    """Verify resource limits against expected values."""
    # Fetch firecracker process limits for number of open fds
    (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE)
    assert soft == no_file
    assert hard == no_file

    # Fetch firecracker process limits for maximum file size
    (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE)
    assert soft == fsize
    assert hard == fsize
Ejemplo n.º 5
0
def test_sigxfsz_handler(test_microvm_with_api):
    """
    Test intercepting and handling SIGXFSZ.

    @type: functional
    """
    microvm = test_microvm_with_api
    microvm.spawn()

    # We don't need to monitor the memory for this test.
    microvm.memory_monitor = None

    # We need to use the Sync file engine type. If we use io_uring we will not
    # get a SIGXFSZ. We'll instead get an errno 27 File too large as the
    # completed entry status code.
    microvm.basic_config(rootfs_io_engine="Sync")

    # Configure metrics based on a file.
    metrics_path = os.path.join(microvm.path, 'metrics_fifo')
    utils.run_cmd("touch {}".format(metrics_path))
    response = microvm.metrics.put(
        metrics_path=microvm.create_jailed_resource(metrics_path)
    )
    assert microvm.api_session.is_status_no_content(response.status_code)

    microvm.start()

    metrics_jail_path = os.path.join(microvm.jailer.chroot_path(),
                                     metrics_path)
    metrics_fd = open(metrics_jail_path, encoding='utf-8')
    line_metrics = metrics_fd.readlines()
    assert len(line_metrics) == 1

    firecracker_pid = int(microvm.jailer_clone_pid)
    size = os.path.getsize(metrics_jail_path)
    # The SIGXFSZ is triggered because the size of rootfs is bigger than
    # the size of metrics file times 3. Since the metrics file is flushed
    # twice we have to make sure that the limit is bigger than that
    # in order to make sure the SIGXFSZ metric is logged
    res.prlimit(firecracker_pid, res.RLIMIT_FSIZE, (size*3, res.RLIM_INFINITY))

    while True:
        try:
            utils.run_cmd("ps -p {}".format(firecracker_pid))
            sleep(1)
        except ChildProcessError:
            break

    microvm.expect_kill_by_signal = True
    msg = 'Shutting down VM after intercepting signal 25, code 0'
    microvm.check_log_message(msg)
    metric_line = json.loads(metrics_fd.readlines()[0])
    assert metric_line["signals"]["sigxfsz"] == 1
Ejemplo n.º 6
0
def proc_rlimit(
        pid: int,
        res: int,
        new_limits: Optional[Tuple[int, int]] = None) -> Tuple[int, int]:
    if pid == 0:
        # prlimit() treats pid=0 specially.
        # psutil doesn't, so we don't either.
        raise ProcessLookupError

    if new_limits is None:
        return resource.prlimit(  # pytype: disable=missing-parameter  # pylint: disable=no-member
            pid, res)
    else:
        return resource.prlimit(pid, res, new_limits)  # pylint: disable=no-member
Ejemplo n.º 7
0
def limit_resources(timeout, pid=None):
    """Apply resource limit given by ``--memout`` and timeout arguments."""
    if pid:
        setlimit = lambda *args: resource.prlimit(pid, *args)  # noqa: E731
    else:
        setlimit = lambda *args: resource.setrlimit(*args)  # noqa: E731

    if options.args().memout:
        setlimit(resource.RLIMIT_AS,
                 (options.args().memout * 1024 * 1024, resource.RLIM_INFINITY))
    if timeout:
        timeout = math.ceil(timeout)
        setlimit(resource.RLIMIT_CPU, (timeout, timeout))
Ejemplo n.º 8
0
def run_rnasubopt(ifname, deltaenergy, number=None):
    errs = []
    rna_args = ['nice', args.rnasubopt, '-e', str(deltaenergy), '-i', ifname]
    safety_args = ['nice', args.trivialsafety]
    if number != None:
        safety_args.append('-num')
        safety_args.append(str(number))

    #print(' '.join(rna_args), '|', ' '.join(safety_args))

    rna = psutil.Popen(rna_args,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
    safety = psutil.Popen(safety_args,
                          stdin=rna.stdout,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)
    rna.stdout.close()

    if args.timeout > 0:
        resource.prlimit(
            rna.pid, resource.RLIMIT_CPU,
            (int(args.timeout * 60 * 60), int(args.timeout * 60 * 60)))

    folddata = None
    try:
        folddata = json.loads(safety.stdout.read().decode('utf-8'))
    except json.decoder.JSONDecodeError as e:
        errs.append('{}: Failed to decode trivial safety output:'.format(
            ifname, e))

    rnaerr = rna.stderr.read()
    if rnaerr != None and len(rnaerr) > 0:
        errs.append('RNAsubopt for {} returned errors:\n{}'.format(
            ifname, rnaerr))
    safetyerr = safety.stderr.read()
    if safetyerr != None and len(safetyerr) > 0:
        errs.append('Trivial safety for {} returned errors:\n{}'.format(
            ifname, safetyerr))

    rpid, rstatus, rres = os.wait4(rna.pid, 0)
    #print('RNA:    Status: {}, user (s): {:5.1f}, sys (s): {:5.1f}, maxrss (kB): {:6d}'.format(
    #    rstatus, rres.ru_utime, rres.ru_stime, rres.ru_maxrss))
    spid, sstatus, sres = os.wait4(safety.pid, 0)
    #print('Safety: Status: {}, user (s): {:5.1f}, sys (s): {:5.1f}, maxrss (kB): {:6d}'.format(
    #    sstatus, sres.ru_utime, sres.ru_stime, sres.ru_maxrss))

    if os.WIFSIGNALED(rstatus) and not (number != None and os.WTERMSIG(rstatus)
                                        == signal.SIGPIPE):
        errs.append('{}: RNAsubopt was terminated with signal {}'.format(
            ifname, os.WTERMSIG(rstatus)))
        return (None, errs)
    if os.WIFSIGNALED(sstatus):
        errs.append('{}: Trivialsafety was terminated with signal {}'.format(
            ifname, os.WTERMSIG(sstatus)))
        return (None, errs)
    if folddata == None:
        errs.append('{}: folddata was None for unknown reason'.format(ifname))
        return (None, errs)

    folddata['Command'] = ' '.join(rna_args), '|', ' '.join(safety_args)
    folddata['Resources'] = {
        'RNAsuboptUser': rres.ru_utime,
        'RNAsuboptSys': rres.ru_stime,
        'RNAsuboptRSS': rres.ru_maxrss,
        'TrivialSafetyUser': sres.ru_utime,
        'TrivialSafetySys': sres.ru_stime,
        'TrivialSafetyRSS': sres.ru_maxrss,
    }
    return (folddata, errs)
Ejemplo n.º 9
0
def test_api_machine_config(test_microvm_with_api):
    """
    Test /machine_config PUT/PATCH scenarios that unit tests can't cover.

    @type: functional
    """
    test_microvm = test_microvm_with_api
    test_microvm.spawn()

    # Test invalid vcpu count < 0.
    response = test_microvm.machine_cfg.put(vcpu_count='-2')
    assert test_microvm.api_session.is_status_bad_request(response.status_code)

    # Test invalid type for ht_enabled flag.
    response = test_microvm.machine_cfg.put(ht_enabled='random_string')
    assert test_microvm.api_session.is_status_bad_request(response.status_code)

    # Test invalid CPU template.
    response = test_microvm.machine_cfg.put(cpu_template='random_string')
    assert test_microvm.api_session.is_status_bad_request(response.status_code)

    response = test_microvm.machine_cfg.patch(track_dirty_pages=True)
    assert test_microvm.api_session.is_status_bad_request(response.status_code)

    response = test_microvm.machine_cfg.patch(cpu_template='C3')
    if platform.machine() == "x86_64":
        assert test_microvm.api_session.is_status_no_content(
            response.status_code)
    else:
        assert test_microvm.api_session.is_status_bad_request(
            response.status_code)
        assert "CPU templates are not supported on aarch64" in response.text

    # Test invalid mem_size_mib < 0.
    response = test_microvm.machine_cfg.put(mem_size_mib='-2')
    assert test_microvm.api_session.is_status_bad_request(response.status_code)

    # Test invalid mem_size_mib > usize::MAX.
    bad_size = 1 << 64
    response = test_microvm.machine_cfg.put(mem_size_mib=bad_size)
    fail_msg = "error occurred when deserializing the json body of a " \
               "request: invalid type"
    assert test_microvm.api_session.is_status_bad_request(response.status_code)
    assert fail_msg in response.text

    # Test mem_size_mib of valid type, but too large.
    test_microvm.basic_config()
    firecracker_pid = int(test_microvm.jailer_clone_pid)
    resource.prlimit(firecracker_pid, resource.RLIMIT_AS,
                     (MEM_LIMIT, resource.RLIM_INFINITY))

    bad_size = (1 << 64) - 1
    response = test_microvm.machine_cfg.patch(mem_size_mib=bad_size)
    assert test_microvm.api_session.is_status_no_content(response.status_code)

    response = test_microvm.actions.put(action_type='InstanceStart')
    fail_msg = "Invalid Memory Configuration: MmapRegion(Mmap(Os { code: " \
               "12, kind: Other, message: Out of memory }))"
    assert test_microvm.api_session.is_status_bad_request(response.status_code)
    assert fail_msg in response.text

    # Test invalid mem_size_mib = 0.
    response = test_microvm.machine_cfg.patch(mem_size_mib=0)
    assert test_microvm.api_session.is_status_bad_request(response.status_code)
    assert "The memory size (MiB) is invalid." in response.text

    # Test valid mem_size_mib.
    response = test_microvm.machine_cfg.patch(mem_size_mib=256)
    assert test_microvm.api_session.is_status_no_content(response.status_code)

    response = test_microvm.actions.put(action_type='InstanceStart')
    if utils.get_cpu_vendor() != utils.CpuVendor.INTEL:
        # We shouldn't be able to apply Intel templates on AMD hosts
        fail_msg = "Internal error while starting microVM: Error configuring" \
                   " the vcpu for boot: Cpuid error: InvalidVendor"
        assert test_microvm.api_session.is_status_bad_request(
            response.status_code)
        assert fail_msg in response.text
    else:
        assert test_microvm.api_session.is_status_no_content(
            response.status_code)

    # Validate full vm configuration after patching machine config.
    response = test_microvm.full_cfg.get()
    assert test_microvm.api_session.is_status_ok(response.status_code)
    assert response.json()['machine-config']['vcpu_count'] == 2
    assert response.json()['machine-config']['mem_size_mib'] == 256
Ejemplo n.º 10
0
    async def start(self):

        exe_dir = path.join(conf.get('home'), 'bin')
        exe_path = path.join(exe_dir, 'jamovi-engine')

        env = os.environ.copy()
        env['R_HOME'] = conf.get('r_home', env.get('R_HOME', ''))
        env['R_LIBS'] = conf.get('r_libs', env.get('R_LIBS', ''))
        env['FONTCONFIG_PATH'] = conf.get('fontconfig_path',
                                          env.get('FONTCONFIG_PATH', ''))
        env['JAMOVI_MODULES_PATH'] = conf.get(
            'modules_path', env.get('JAMOVI_MODULES_PATH', ''))

        con = '--con={}'.format(self._conn_path)
        pth = '--path={}'.format(self._data_path)

        try:
            if platform.uname().system == 'Windows':
                si = subprocess.STARTUPINFO()
                # makes the engine windows visible in debug mode (on windows)
                if not conf.get('debug', False):
                    si.dwFlags |= subprocess.STARTF_USESHOWWINDOW

                self._process = subprocess.Popen(
                    [exe_path, con, pth],
                    startupinfo=si,
                    stdout=None,  # stdouts seem to break things on windows
                    stderr=None,
                    env=env)
            else:
                # stdin=PIPE, because the engines use the closing of
                # stdin to terminate themselves.
                self._process = await create_subprocess_exec(
                    exe_path,
                    con,
                    pth,
                    stdout=None,
                    stderr=None,
                    stdin=subprocess.PIPE,
                    env=env)

            mem_limit = conf.get('memory_limit_engine', None)
            if mem_limit:
                if platform.uname().system == 'Linux':
                    import resource
                    try:
                        limit = int(mem_limit) * 1024 * 1024  # Mb
                        resource.prlimit(self._process.pid, resource.RLIMIT_AS,
                                         (limit, limit))
                    except ValueError:
                        raise ValueError('memory_limit_engine: bad value')
                else:
                    raise ValueError(
                        'memory_limit_engine is unavailable on systems other than linux'
                    )

            if self._monitor is not None:
                self._monitor.monitor(self._process)

            self._socket = nanomsg.Socket(nanomsg.PAIR)
            self._socket._set_recv_timeout(500)
            self._socket.bind(self._conn_path)

            # need a separate thread for nanomsg :/
            self._thread = threading.Thread(target=self._run)
            self._thread.start()

        except Exception as e:
            log.exception(e)
            self._parent._notify_engine_event({
                'type': 'error',
                'message': 'Engine process could not be started',
                'cause': str(e),
            })
Ejemplo n.º 11
0
def run(args):
    # Set a relatively low cap on max open sessions, so we can saturate it in a reasonable amount of time
    args.max_open_sessions = 100
    args.max_open_sessions_hard = args.max_open_sessions + 20

    # Chunk often, so that new fds are regularly requested
    args.ledger_chunk_bytes = "500B"

    with infra.network.network(args.nodes,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        check = infra.checker.Checker()
        network.start_and_join(args)
        primary, _ = network.find_nodes()

        primary_pid = primary.remote.remote.proc.pid

        initial_fds = psutil.Process(primary_pid).num_fds()
        assert (
            initial_fds < args.max_open_sessions
        ), f"Initial number of file descriptors has already reached session limit: {initial_fds} >= {args.max_open_sessions}"

        num_fds = initial_fds
        LOG.success(f"{primary_pid} has {num_fds} open file descriptors")

        def create_connections_until_exhaustion(target,
                                                continue_to_hard_cap=False):
            with contextlib.ExitStack() as es:
                clients = []
                LOG.success(f"Creating {target} clients")
                consecutive_failures = 0
                for i in range(target):
                    logs = []
                    try:
                        clients.append(
                            es.enter_context(
                                primary.client("user0", connection_timeout=1)))
                        r = clients[-1].post(
                            "/app/log/private",
                            {
                                "id": 42,
                                "msg": "foo"
                            },
                            log_capture=logs,
                        )
                        if r.status_code == http.HTTPStatus.OK:
                            check(
                                r,
                                result=True,
                            )
                            consecutive_failures = 0
                        elif r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE:
                            if continue_to_hard_cap:
                                consecutive_failures = 0
                                continue
                            raise RuntimeError(r.body.text())
                        else:
                            flush_info(logs)
                            raise ValueError(
                                f"Unexpected response status code: {r.status_code}"
                            )
                    except (CCFConnectionException, RuntimeError) as e:
                        flush_info(logs)
                        LOG.warning(f"Hit exception at client {i}: {e}")
                        clients.pop(-1)
                        if consecutive_failures < 5:
                            # Maybe got unlucky and tried to create a session while many files were open - keep trying
                            consecutive_failures += 1
                            continue
                        else:
                            # Ok you've really hit a wall, stop trying to create clients
                            break
                else:
                    raise AllConnectionsCreatedException(
                        f"Successfully created {target} clients without exception - expected this to exhaust available connections"
                    )

                num_fds = psutil.Process(primary_pid).num_fds()
                LOG.success(
                    f"{primary_pid} has {num_fds}/{max_fds} open file descriptors"
                )

                # Submit many requests, and at least enough to trigger additional snapshots
                more_requests = max(
                    len(clients) * 3, args.snapshot_tx_interval * 2)
                LOG.info(
                    f"Submitting an additional {more_requests} requests from existing clients"
                )
                for _ in range(more_requests):
                    client = random.choice(clients)
                    logs = []
                    try:
                        client.post(
                            "/app/log/private",
                            {
                                "id": 42,
                                "msg": "foo"
                            },
                            timeout=1,
                            log_capture=logs,
                        )
                    except Exception as e:
                        flush_info(logs)
                        LOG.error(e)
                        raise e

                time.sleep(1)
                num_fds = psutil.Process(primary_pid).num_fds()
                LOG.success(
                    f"{primary_pid} has {num_fds}/{max_fds} open file descriptors"
                )

                LOG.info("Disconnecting clients")
                clients = []

            time.sleep(1)
            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(
                f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            return num_fds

        # For initial safe tests, we have many more fds than the maximum sessions, so file operations should still succeed even when network is saturated
        max_fds = args.max_open_sessions + (initial_fds * 2)
        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE,
                         (max_fds, max_fds))
        LOG.success(
            f"Setting max fds to safe initial value {max_fds} on {primary_pid}"
        )

        nb_conn = (max_fds - num_fds) * 2
        num_fds = create_connections_until_exhaustion(nb_conn)

        to_create = max_fds - num_fds + 1
        num_fds = create_connections_until_exhaustion(to_create)

        try:
            create_connections_until_exhaustion(to_create, True)
        except AllConnectionsCreatedException as e:
            # This is fine! The soft cap means this test no longer reaches the hard cap.
            # It gets HTTP errors but then _closes_ sockets, fast enough that we never hit the hard cap
            pass

        # Now set a low fd limit, so network sessions completely exhaust them - expect this to cause failures
        max_fds = args.max_open_sessions // 2
        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE,
                         (max_fds, max_fds))
        LOG.success(
            f"Setting max fds to dangerously low {max_fds} on {primary_pid}")

        try:
            num_fds = create_connections_until_exhaustion(to_create)
        except Exception as e:
            LOG.warning(
                f"Node with only {max_fds} fds crashed when allowed to created {args.max_open_sessions} sessions, as expected"
            )
            LOG.warning(e)
            network.ignore_errors_on_shutdown()
        else:
            raise RuntimeError("Expected a fatal crash and saw none!")
Ejemplo n.º 12
0
def run(args):
    hosts = ["localhost"] * (4 if args.consensus == "pbft" else 1)

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        check = infra.checker.Checker()
        network.start_and_join(args)
        primary, _ = network.find_nodes()

        primary_pid = primary.remote.remote.proc.pid
        num_fds = psutil.Process(primary_pid).num_fds()
        max_fds = num_fds + 150
        LOG.success(f"{primary_pid} has {num_fds} open file descriptors")

        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE,
                         (max_fds, max_fds))
        LOG.success(f"set max fds to {max_fds} on {primary_pid}")

        nb_conn = (max_fds - num_fds) * 2
        clients = []

        with contextlib.ExitStack() as es:
            LOG.success(f"Creating {nb_conn} clients")
            for i in range(nb_conn):
                try:
                    clients.append(es.enter_context(primary.client("user0")))
                    LOG.info(f"Created client {i}")
                except OSError:
                    LOG.error(f"Failed to create client {i}")

            # Creating clients may not actually create connections/fds. Send messages until we run out of fds
            for i, c in enumerate(clients):
                if psutil.Process(primary_pid).num_fds() >= max_fds:
                    LOG.warning(f"Reached fd limit at client {i}")
                    break
                LOG.info(f"Sending as client {i}")
                check(c.post("/app/log/private", {
                    "id": 42,
                    "msg": "foo"
                }),
                      result=True)

            try:
                clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"})
            except Exception:
                pass
            else:
                assert False, "Expected error due to fd limit"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(
                f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            LOG.info("Disconnecting clients")
            clients = []

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.success(
            f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")

        with contextlib.ExitStack() as es:
            to_create = max_fds - num_fds + 1
            LOG.success(f"Creating {to_create} clients")
            for i in range(to_create):
                clients.append(es.enter_context(primary.client("user0")))
                LOG.info(f"Created client {i}")

            for i, c in enumerate(clients):
                if psutil.Process(primary_pid).num_fds() >= max_fds:
                    LOG.warning(f"Reached fd limit at client {i}")
                    break
                LOG.info(f"Sending as client {i}")
                check(c.post("/app/log/private", {
                    "id": 42,
                    "msg": "foo"
                }),
                      result=True)

            try:
                clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"})
            except Exception:
                pass
            else:
                assert False, "Expected error due to fd limit"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(
                f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            LOG.info("Disconnecting clients")
            clients = []

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.success(
            f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
Ejemplo n.º 13
0
def run(args):
    # Set a relatively low cap on max open sessions, so we can saturate it in a reasonable amount of time
    args.max_open_sessions = 40
    args.max_open_sessions_hard = args.max_open_sessions + 5

    # Listen on additional RPC interfaces with even lower session caps
    for i, node_spec in enumerate(args.nodes):
        additional_args = []
        caps = interface_caps(i)
        for address, cap in caps.items():
            additional_args.append(f"--rpc-interface={address},,{cap}")
        node_spec.additional_raw_node_args = additional_args

    # Chunk often, so that new fds are regularly requested
    args.ledger_chunk_bytes = "500B"

    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        check = infra.checker.Checker()
        network.start_and_join(args)
        primary, _ = network.find_nodes()

        caps = interface_caps(primary.local_node_id)

        primary_pid = primary.remote.remote.proc.pid

        initial_fds = psutil.Process(primary_pid).num_fds()
        assert (
            initial_fds < args.max_open_sessions
        ), f"Initial number of file descriptors has already reached session limit: {initial_fds} >= {args.max_open_sessions}"

        num_fds = initial_fds
        LOG.success(f"{primary_pid} has {num_fds} open file descriptors")

        initial_metrics = get_session_metrics(primary)
        assert initial_metrics["active"] <= initial_metrics["peak"], initial_metrics
        main_session_metrics = initial_metrics["interfaces"][
            f"{primary.rpc_host}:{primary.rpc_port}"
        ]
        assert (
            main_session_metrics["soft_cap"] == args.max_open_sessions
        ), initial_metrics
        assert (
            main_session_metrics["hard_cap"] == args.max_open_sessions_hard
        ), initial_metrics

        max_fds = args.max_open_sessions + (initial_fds * 2)

        def create_connections_until_exhaustion(
            target, continue_to_hard_cap=False, client_fn=primary.client
        ):
            with contextlib.ExitStack() as es:
                clients = []
                LOG.success(f"Creating {target} clients")
                consecutive_failures = 0
                i = 1
                healthy_clients = []
                while i <= target:
                    logs = []
                    try:
                        clients.append(
                            es.enter_context(
                                client_fn(
                                    identity="user0",
                                    connection_timeout=1,
                                    limits=httpx.Limits(
                                        max_connections=1,
                                        max_keepalive_connections=1,
                                        keepalive_expiry=30,
                                    ),
                                )
                            )
                        )
                        r = clients[-1].post(
                            "/app/log/private",
                            {"id": 42, "msg": "foo"},
                            log_capture=logs,
                        )
                        if r.status_code == http.HTTPStatus.OK:
                            check(
                                r,
                                result=True,
                            )
                            consecutive_failures = 0
                            i += 1
                            healthy_clients.append(clients[-1])
                        elif r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE:
                            if continue_to_hard_cap:
                                consecutive_failures = 0
                                i += 1
                                continue
                            raise RuntimeError(r.body.text())
                        else:
                            flush_info(logs)
                            raise ValueError(
                                f"Unexpected response status code: {r.status_code}"
                            )
                    except (CCFConnectionException, RuntimeError) as e:
                        flush_info(logs)
                        LOG.warning(f"Hit exception at client {i}/{target}: {e}")
                        clients.pop(-1)
                        if consecutive_failures < 5:
                            # Maybe got unlucky and tried to create a session while many files were open - keep trying
                            consecutive_failures += 1
                            continue
                        else:
                            # Ok you've really hit a wall, stop trying to create clients
                            break
                else:
                    raise AllConnectionsCreatedException(
                        f"Successfully created {target} clients without exception - expected this to exhaust available connections"
                    )

                num_fds = psutil.Process(primary_pid).num_fds()
                LOG.success(
                    f"{primary_pid} has {num_fds}/{max_fds} open file descriptors"
                )
                r = clients[0].get("/node/metrics")
                assert r.status_code == http.HTTPStatus.OK, r.status_code
                peak_metrics = r.body.json()["sessions"]
                assert peak_metrics["active"] <= peak_metrics["peak"], peak_metrics
                assert peak_metrics["active"] == len(healthy_clients), (
                    peak_metrics,
                    len(healthy_clients),
                )

                # Submit many requests, and at least enough to trigger additional snapshots
                more_requests = max(len(clients) * 3, args.snapshot_tx_interval * 2)
                LOG.info(
                    f"Submitting an additional {more_requests} requests from existing clients"
                )
                for _ in range(more_requests):
                    client = random.choice(healthy_clients)
                    logs = []
                    try:
                        client.post(
                            "/app/log/private",
                            {"id": 42, "msg": "foo"},
                            timeout=1,
                            log_capture=logs,
                        )
                    except Exception as e:
                        flush_info(logs)
                        LOG.error(e)
                        raise e

                time.sleep(1)
                num_fds = psutil.Process(primary_pid).num_fds()
                LOG.success(
                    f"{primary_pid} has {num_fds}/{max_fds} open file descriptors"
                )

                LOG.info("Disconnecting clients")
                clients = []

            time.sleep(1)
            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            return num_fds

        # For initial safe tests, we have many more fds than the maximum sessions, so file operations should still succeed even when network is saturated
        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds))
        LOG.success(f"Setting max fds to safe initial value {max_fds} on {primary_pid}")

        nb_conn = (max_fds - num_fds) * 2
        num_fds = create_connections_until_exhaustion(nb_conn)

        to_create = max_fds - num_fds + 1
        num_fds = create_connections_until_exhaustion(to_create)

        # Check that lower caps are enforced on each interface
        for i, (address, cap) in enumerate(caps.items()):
            create_connections_until_exhaustion(
                cap + 1,
                client_fn=functools.partial(primary.client, interface_idx=i + 1),
            )

        try:
            create_connections_until_exhaustion(to_create, True)
        except AllConnectionsCreatedException:
            # This is fine! The soft cap means this test no longer reaches the hard cap.
            # It gets HTTP errors but then _closes_ sockets, fast enough that we never hit the hard cap
            pass

        final_metrics = get_session_metrics(primary)
        assert final_metrics["active"] <= final_metrics["peak"], final_metrics
        assert final_metrics["peak"] > initial_metrics["peak"], (
            initial_metrics,
            final_metrics,
        )
        assert final_metrics["peak"] >= args.max_open_sessions, final_metrics
        assert final_metrics["peak"] < args.max_open_sessions_hard, final_metrics

        # Now set a low fd limit, so network sessions completely exhaust them - expect this to cause failures
        max_fds = args.max_open_sessions // 2
        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds))
        LOG.success(f"Setting max fds to dangerously low {max_fds} on {primary_pid}")

        try:
            num_fds = create_connections_until_exhaustion(to_create)
        except Exception as e:
            LOG.warning(
                f"Node with only {max_fds} fds crashed when allowed to created {args.max_open_sessions} sessions, as expected"
            )
            LOG.warning(e)
            network.ignore_errors_on_shutdown()
        else:
            LOG.warning("Expected a fatal crash and saw none!")
Ejemplo n.º 14
0
import resource
import os
import sys

FD_LIMIT = int(sys.argv[1])

resource.prlimit(os.getpid(), resource.RLIMIT_NOFILE, (FD_LIMIT, FD_LIMIT))

try:
    fptr = open("hello.txt", "r")
    print(fptr.readlines())
    fptr.close()
except:
    pass
Ejemplo n.º 15
0
    async def start(self):

        self._at_startup = True
        self._process_stopping = threading.Event()
        self._process_abandoned = threading.Event()

        if self._socket is not None:
            try:
                self._socket.close()
            except Exception as e:
                log.exception(e)
            self._socket = None

        self._conn_path = f'{self._conn_root}-{self._parent._next_conn_index}'
        self._parent._next_conn_index += 1

        bin_dir = 'bin' if platform.system() != 'Darwin' else 'MacOS'
        exe_dir = path.join(self._config.get('home'), bin_dir)
        exe_path = path.join(exe_dir, 'jamovi-engine')

        env = os.environ.copy()
        env['R_HOME'] = self._config.get('r_home', env.get('R_HOME', ''))
        env['R_LIBS'] = self._config.get('r_libs', env.get('R_LIBS', ''))
        env['FONTCONFIG_PATH'] = self._config.get('fontconfig_path', env.get('FONTCONFIG_PATH', ''))
        env['JAMOVI_MODULES_PATH'] = self._config.get('modules_path', env.get('JAMOVI_MODULES_PATH', ''))

        if platform.uname().system == 'Linux':
            # plotting under linux sometimes doesn't work without this
            env['LC_ALL'] = 'en_US.UTF-8'
            # https://github.com/jamovi/jamovi/issues/801
            # https://github.com/jamovi/jamovi/issues/831

        con = '--con={}'.format(self._conn_path)
        pth = '--path={}'.format(self._data_path)

        try:
            if platform.uname().system == 'Windows':
                si = subprocess.STARTUPINFO()
                # makes the engine windows visible in debug mode (on windows)
                if not self._config.get('debug', False):
                    si.dwFlags |= subprocess.STARTF_USESHOWWINDOW

                self._process = subprocess.Popen(
                    [ exe_path, con, pth ],
                    startupinfo=si,
                    stdout=None,  # stdouts seem to break things on windows
                    stderr=None,
                    env=env)
            else:
                # stdin=PIPE, because the engines use the closing of
                # stdin to terminate themselves.
                self._process = await create_subprocess_exec(
                    exe_path, con, pth,
                    stdout=None,
                    stderr=None,
                    stdin=subprocess.PIPE,
                    env=env)

            mem_limit = self._config.get('memory_limit_engine', None)
            if mem_limit:
                if platform.uname().system == 'Linux':
                    import resource
                    try:
                        limit = int(mem_limit) * 1024 * 1024  # Mb
                        resource.prlimit(self._process.pid, resource.RLIMIT_AS, (limit, limit))
                    except ValueError:
                        raise ValueError('memory_limit_engine: bad value')
                else:
                    raise ValueError('memory_limit_engine is unavailable on systems other than linux')

            if self._monitor is not None:
                self._monitor.monitor(self._process)

            self._socket = nanomsg.Socket(nanomsg.PAIR)
            self._socket._set_recv_timeout(500)
            self._socket.bind(self._conn_path)

            # need a separate thread for nanomsg :/
            self._thread = threading.Thread(target=self._run_loop, args=(
                self._socket,
                self._process,
                self._process_stopping,
                self._process_abandoned))
            self._thread.start()

            self._stopped.clear()
            self._running.set()

        except Exception as e:
            log.exception(e)
            self._parent._notify_engine_event({
                'type': 'error',
                'message': 'Engine process could not be started',
                'cause': str(e),
            })
Ejemplo n.º 16
0
def run(args):
    hosts = ["localhost"]

    with infra.ccf.network(hosts,
                           args.binary_dir,
                           args.debug_nodes,
                           args.perf_nodes,
                           pdb=args.pdb) as network:
        check = infra.checker.Checker()
        network.start_and_join(args)
        primary, others = network.find_nodes()

        primary_pid = primary.remote.remote.proc.pid
        num_fds = psutil.Process(primary_pid).num_fds()
        max_fds = num_fds + 50
        LOG.info(f"{primary_pid} has {num_fds} open file descriptors")

        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE,
                         (max_fds, max_fds))
        LOG.info(f"set max fds to {max_fds} on {primary_pid}")

        nb_conn = (max_fds - num_fds) * 2
        clients = []

        with contextlib.ExitStack() as es:
            for i in range(nb_conn):
                try:
                    clients.append(
                        es.enter_context(primary.user_client(format="json")))
                    LOG.info(f"Connected client {i}")
                except OSError:
                    LOG.error(f"Failed to connect client {i}")

            c = clients[int(random.random() * len(clients))]
            check(c.rpc("LOG_record", {"id": 42, "msg": "foo"}), result=True)

            assert (
                len(clients) >= max_fds - num_fds - 1
            ), f"{len(clients)}, expected at least {max_fds - num_fds - 1}"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.info(f"{primary_pid} has {num_fds} open file descriptors")
            LOG.info(f"Disconnecting clients")

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.info(f"{primary_pid} has {num_fds} open file descriptors")

        clients = []
        with contextlib.ExitStack() as es:
            for i in range(max_fds - num_fds):
                clients.append(
                    es.enter_context(primary.user_client(format="json")))
                LOG.info(f"Connected client {i}")

            c = clients[int(random.random() * len(clients))]
            check(c.rpc("LOG_record", {"id": 42, "msg": "foo"}), result=True)

            assert (
                len(clients) >= max_fds - num_fds - 1
            ), f"{len(clients)}, expected at least {max_fds - num_fds - 1}"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.info(f"{primary_pid} has {num_fds} open file descriptors")
            LOG.info(f"Disconnecting clients")

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.info(f"{primary_pid} has {num_fds} open file descriptors")
Ejemplo n.º 17
0
def execute(logger,
            args,
            env=None,
            cwd=None,
            timeout=0.1,
            collect_all_stdout=False,
            filter_func=None,
            enforce_limitations=False,
            cpu_time_limit=450,
            memory_limit=1000000000):
    cmd = args[0]
    logger.debug('Execute:\n{0}{1}{2}'.format(
        cmd, '' if len(args) == 1 else ' ',
        ' '.join('"{0}"'.format(arg) for arg in args[1:])))

    if enforce_limitations:
        soft_time, hard_time = resource.getrlimit(resource.RLIMIT_CPU)
        soft_mem, hard_mem = resource.getrlimit(resource.RLIMIT_AS)
        logger.debug(
            'Got the following limitations: CPU time = {}s, memory = {}B'.
            format(cpu_time_limit, memory_limit))

    p = subprocess.Popen(args,
                         env=env,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         cwd=cwd)
    if enforce_limitations:
        resource.prlimit(p.pid, resource.RLIMIT_CPU,
                         [cpu_time_limit, hard_time])
        resource.prlimit(p.pid, resource.RLIMIT_AS, [memory_limit, hard_mem])

    out_q, err_q = (StreamQueue(p.stdout, 'STDOUT', collect_all_stdout),
                    StreamQueue(p.stderr, 'STDERR', True))

    for stream_q in (out_q, err_q):
        stream_q.start()

    # Print to logs everything that is printed to STDOUT and STDERR each timeout seconds. Last try is required to
    # print last messages queued before command finishes.
    last_try = True
    while not out_q.finished or not err_q.finished or last_try:
        if out_q.traceback:
            raise RuntimeError(
                'STDOUT reader thread failed with the following traceback:\n{0}'
                .format(out_q.traceback))
        if err_q.traceback:
            raise RuntimeError(
                'STDERR reader thread failed with the following traceback:\n{0}'
                .format(err_q.traceback))
        last_try = not out_q.finished or not err_q.finished
        time.sleep(timeout)

        for stream_q in (out_q, err_q):
            output = []
            while True:
                line = stream_q.get()
                if line is None:
                    break
                output.append(line)
            if output:
                m = '"{0}" outputted to {1}:\n{2}'.format(
                    cmd, stream_q.stream_name, '\n'.join(output))
                if stream_q is out_q:
                    logger.debug(m)
                else:
                    logger.warning(m)

    for stream_q in (out_q, err_q):
        stream_q.join()

    if p.poll():
        logger.error('"{0}" exited with "{1}"'.format(cmd, p.poll()))
        with open('problem desc.txt', 'a', encoding='utf-8') as fp:
            out = filter(filter_func,
                         err_q.output) if filter_func else err_q.output
            fp.write('\n'.join(out))
        sys.exit(1)
    elif collect_all_stdout:
        return out_q.output
Ejemplo n.º 18
0
    return nbr_a, nbr_d


@chrono(1)
def fight_n(nbr_a, nbr_d, nbr):
    sum_a, sum_d = [], []
    for _ in range(nbr):
        a, d = fight(nbr_a, nbr_d)
        sum_a.append(a)
        sum_d.append(d)
    return statistics.mean(sum_a), statistics.mean(sum_d)


@parallel
def a_fight(nbr_a, nbr_d, q):
    q.put(fight(nbr_a, nbr_d))


@chrono(1)
def a_fight_n(nbr_a, nbr_d, nbr):
    q = mp.Queue(nbr)
    for _ in range(nbr):
        a_fight(nbr_a, nbr_d, q)
    return statistics.mean(q.get()[0] for _ in range(nbr))


if __name__ == '__main__':
    print(resource.prlimit())
    print(a_fight_n(300, 100, 500))
    print(fight_n(300, 100, 500))