def short_kes_start_cluster(tmp_path_factory: TempdirFactory) -> Path:
    """Update *slotsPerKESPeriod* and *maxKESEvolutions*."""
    pytest_globaltemp = helpers.get_pytest_globaltemp(tmp_path_factory)

    # need to lock because this same fixture can run on several workers in parallel
    with helpers.FileLockIfXdist(
            f"{pytest_globaltemp}/startup_files_short_kes.lock"):
        destdir = pytest_globaltemp / "startup_files_short_kes"
        destdir.mkdir(exist_ok=True)

        # return existing script if it is already generated by other worker
        destdir_ls = list(destdir.glob("start-cluster*"))
        if destdir_ls:
            return destdir_ls[0]

        startup_files = cluster_nodes.get_cluster_type(
        ).cluster_scripts.copy_scripts_files(destdir=destdir)
        with open(startup_files.genesis_spec) as fp_in:
            genesis_spec = json.load(fp_in)

        genesis_spec["slotsPerKESPeriod"] = 700
        genesis_spec["maxKESEvolutions"] = 5

        with open(startup_files.genesis_spec, "w") as fp_out:
            json.dump(genesis_spec, fp_out)

        return startup_files.start_script
Exemple #2
0
 def set_needs_restart(self) -> None:
     """Indicate that the cluster needs restart."""
     with helpers.FileLockIfXdist(self.cluster_lock):
         self._log(
             f"c{self.cluster_instance_num}: called `set_needs_restart`")
         open(self.instance_dir / f"{RESTART_NEEDED_GLOB}_{self.worker_id}",
              "a").close()
Exemple #3
0
def epoch_length_start_cluster(tmp_path_factory: TempdirFactory) -> Path:
    """Update *epochLength* to 1200."""
    pytest_globaltemp = helpers.get_pytest_globaltemp(tmp_path_factory)

    # need to lock because this same fixture can run on several workers in parallel
    with helpers.FileLockIfXdist(
            f"{pytest_globaltemp}/startup_files_epoch_1200.lock"):
        destdir = pytest_globaltemp / "startup_files_epoch_1200"
        destdir.mkdir(exist_ok=True)

        # return existing script if it is already generated by other worker
        destdir_ls = list(destdir.glob("start-cluster*"))
        if destdir_ls:
            return destdir_ls[0]

        startup_files = cluster_nodes.get_cluster_type(
        ).cluster_scripts.copy_scripts_files(destdir=destdir)
        with open(startup_files.genesis_spec) as fp_in:
            genesis_spec = json.load(fp_in)

        genesis_spec["epochLength"] = 1500

        with open(startup_files.genesis_spec, "w") as fp_out:
            json.dump(genesis_spec, fp_out)

        return startup_files.start_script
def return_funds_to_faucet(
    *src_addrs: clusterlib.AddressRecord,
    cluster_obj: clusterlib.ClusterLib,
    faucet_addr: str,
    amount: int = -1,
    tx_name: Optional[str] = None,
    destination_dir: FileType = ".",
) -> None:
    """Send `amount` from all `src_addrs` to `faucet_addr`.

    The amount of "-1" means all available funds.
    """
    tx_name = tx_name or helpers.get_timestamped_rand_str()
    tx_name = f"{tx_name}_return_funds"
    with helpers.FileLockIfXdist(f"{helpers.get_basetemp()}/{faucet_addr}.lock"):
        try:
            logging.disable(logging.ERROR)
            for src in src_addrs:
                fund_dst = [clusterlib.TxOut(address=faucet_addr, amount=amount)]
                fund_tx_files = clusterlib.TxFiles(signing_key_files=[src.skey_file])
                # try to return funds; don't mind if there's not enough funds for fees etc.
                try:
                    cluster_obj.send_funds(
                        src_address=src.address,
                        destinations=fund_dst,
                        tx_name=tx_name,
                        tx_files=fund_tx_files,
                        destination_dir=destination_dir,
                    )
                except Exception:
                    pass
        finally:
            logging.disable(logging.NOTSET)
Exemple #5
0
def add_ignore_rule(files_glob: str, regex: str) -> None:
    """Add ignore rule for expected errors."""
    with helpers.FileLockIfXdist(f"{helpers.get_basetemp()}/ignore_rules.lock"):
        state_dir = cluster_nodes.get_cluster_env().state_dir
        rules_file = state_dir / ERRORS_RULES_FILE_NAME
        with open(rules_file, "a") as infile:
            infile.write(f"{files_glob};;{regex}\n")
def update_params(
    cluster_obj: clusterlib.ClusterLib,
    src_addr_record: clusterlib.AddressRecord,
    update_proposals: List[UpdateProposal],
) -> None:
    """Update params using update proposal."""
    _cli_args = [(u.arg, str(u.value)) for u in update_proposals]
    cli_args = list(itertools.chain.from_iterable(_cli_args))

    with helpers.FileLockIfXdist(f"{helpers.get_basetemp()}/update_params.lock"):
        LOGGER.info("Waiting for new epoch to submit proposal.")
        cluster_obj.wait_for_new_epoch()

        cluster_obj.submit_update_proposal(
            cli_args=cli_args,
            src_address=src_addr_record.address,
            src_skey_file=src_addr_record.skey_file,
            tx_name=helpers.get_timestamped_rand_str(),
        )

        LOGGER.info(f"Update Proposal submitted ({cli_args})")
        cluster_obj.wait_for_new_epoch()

        protocol_params = cluster_obj.get_protocol_params()
        for u in update_proposals:
            # TODO: handle nested dictionaries
            if not u.name:
                continue
            updated_value = protocol_params[u.name]
            if str(updated_value) != str(u.value):
                raise AssertionError(
                    f"Cluster update proposal failed! Param value for {u.name}: {updated_value}.\n"
                    f"Expected: {u.value}\n"
                    f"Tip: {cluster_obj.get_tip()}"
                )
Exemple #7
0
 def _log(self, msg: str) -> None:
     """Log message."""
     if not self.manager_log.is_file():
         return
     with helpers.FileLockIfXdist(self.log_lock):
         with open(self.manager_log, "a") as logfile:
             logfile.write(
                 f"{datetime.datetime.now()} on {self.worker_id}: {msg}\n")
Exemple #8
0
def add_ignore_rule(files_glob: str, regex: str) -> None:
    """Add ignore rule for expected errors."""
    with helpers.FileLockIfXdist(f"{helpers.TEST_TEMP_DIR}/ignore_rules.lock"):
        cluster_env = devops_cluster.get_cluster_env()
        state_dir = cluster_env["state_dir"]
        rules_file = state_dir / ERRORS_RULES_FILE_NAME
        with open(rules_file, "a") as infile:
            infile.write(f"{files_glob};;{regex}\n")
Exemple #9
0
def cluster_cleanup(tmp_path_factory: TempdirFactory, worker_id: str,
                    request: FixtureRequest) -> Generator[None, None, None]:
    pytest_tmp_dir = Path(tmp_path_factory.getbasetemp())

    if not worker_id or worker_id == "master":
        # if cluster was started outside of test framework, do nothing
        if cluster_management.DEV_CLUSTER_RUNNING:
            # TODO: check that socket is open and print error if not
            yield
            return

        yield

        cluster_manager_obj = cluster_management.ClusterManager(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config)
        cluster_manager_obj.save_worker_cli_coverage()
        _stop_all_cluster_instances(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config,
            pytest_tmp_dir=pytest_tmp_dir,
        )
        return

    lock_dir = pytest_tmp_dir = pytest_tmp_dir.parent

    # pylint: disable=consider-using-with
    open(lock_dir / f".started_session_{worker_id}", "a").close()

    yield

    with helpers.FileLockIfXdist(
            f"{lock_dir}/{cluster_management.CLUSTER_LOCK}"):
        cluster_manager_obj = cluster_management.ClusterManager(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config)
        cluster_manager_obj.save_worker_cli_coverage()

        os.remove(lock_dir / f".started_session_{worker_id}")
        if not list(lock_dir.glob(".started_session_*")):
            _stop_all_cluster_instances(
                tmp_path_factory=tmp_path_factory,
                worker_id=worker_id,
                pytest_config=request.config,
                pytest_tmp_dir=pytest_tmp_dir,
            )
def search_cluster_artifacts() -> List[Tuple[Path, str]]:
    """Search cluster artifacts for errors."""
    state_dir = cluster_nodes.get_cluster_env().state_dir
    rules_file = state_dir / ERRORS_RULES_FILE_NAME

    with helpers.FileLockIfXdist(
            f"{helpers.get_basetemp()}/ignore_rules.lock"):
        ignore_rules = get_ignore_rules(rules_file)

    errors = []
    for logfile in state_dir.glob("*.std*"):
        # skip if the log file is status file or rotated log
        if logfile.name.endswith(".offset") or ROTATED_RE.match(logfile.name):
            continue

        # read seek offset (from where to start searching) and timestamp of last search
        offset_file = logfile.parent / f".{logfile.name}.offset"
        if offset_file.exists():
            seek = _get_seek(offset_file)
            timestamp = os.path.getmtime(offset_file)
        else:
            seek = 0
            timestamp = 0.0

        errors_ignored = get_ignore_regex(ignore_rules=ignore_rules,
                                          regexes=ERRORS_IGNORED,
                                          logfile=logfile)
        errors_ignored_re = re.compile(errors_ignored)

        # record offset for the "live" log file
        with open(offset_file, "w") as outfile:
            outfile.write(str(helpers.get_eof_offset(logfile)))

        for logfile_rec in get_rotated_logs(logfile=logfile,
                                            seek=seek,
                                            timestamp=timestamp):
            with open(logfile_rec.logfile) as infile:
                infile.seek(seek)
                for line in infile:
                    if ERRORS_RE.search(line) and not (
                            errors_ignored and errors_ignored_re.search(line)):
                        errors.append((logfile, line))

    return errors
Exemple #11
0
def cluster_cleanup(tmp_path_factory: TempdirFactory, worker_id: str,
                    request: FixtureRequest) -> Generator:
    pytest_tmp_dir = Path(tmp_path_factory.getbasetemp())

    if not worker_id or worker_id == "master":
        yield
        cluster_manager_obj = parallel_run.ClusterManager(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config)
        cluster_manager_obj.save_worker_cli_coverage()
        _stop_all_cluster_instances(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config,
            pytest_tmp_dir=pytest_tmp_dir,
        )
        return

    lock_dir = pytest_tmp_dir = pytest_tmp_dir.parent

    open(lock_dir / f".started_session_{worker_id}", "a").close()

    yield

    with helpers.FileLockIfXdist(f"{lock_dir}/{parallel_run.CLUSTER_LOCK}"):
        cluster_manager_obj = parallel_run.ClusterManager(
            tmp_path_factory=tmp_path_factory,
            worker_id=worker_id,
            pytest_config=request.config)
        cluster_manager_obj.save_worker_cli_coverage()

        os.remove(lock_dir / f".started_session_{worker_id}")
        if not list(lock_dir.glob(".started_session_*")):
            _stop_all_cluster_instances(
                tmp_path_factory=tmp_path_factory,
                worker_id=worker_id,
                pytest_config=request.config,
                pytest_tmp_dir=pytest_tmp_dir,
            )
Exemple #12
0
    def on_test_stop(self) -> None:
        """Perform actions after the test finished."""
        if self._cluster_instance_num == -1:
            return

        with helpers.FileLockIfXdist(self.cluster_lock):
            self._log(f"c{self.cluster_instance_num}: called `on_test_stop`")

            # remove resource locking files created by the worker
            resource_locking_files = list(
                self.instance_dir.glob(
                    f"{RESOURCE_LOCKED_GLOB}_*_{self.worker_id}"))
            for f in resource_locking_files:
                os.remove(f)

            # remove "resource in use" files created by the worker
            resource_in_use_files = list(
                self.instance_dir.glob(
                    f"{RESOURCE_IN_USE_GLOB}_*_{self.worker_id}"))
            for f in resource_in_use_files:
                os.remove(f)

            # remove file that indicates that a test is running on the worker
            try:
                os.remove(self.instance_dir /
                          f"{TEST_RUNNING_GLOB}_{self.worker_id}")
            except FileNotFoundError:
                pass

            # remove file that indicates the test was singleton
            try:
                os.remove(self.instance_dir / TEST_SINGLETON_FILE)
            except FileNotFoundError:
                pass

            # search for errors in cluster logfiles
            errors = logfiles.search_cluster_artifacts()
            if errors:
                logfiles.report_artifacts_errors(errors)
Exemple #13
0
    def get(  # noqa: C901
        self,
        singleton: bool = False,
        mark: str = "",
        lock_resources: UnpackableSequence = (),
        use_resources: UnpackableSequence = (),
        cleanup: bool = False,
        start_cmd: str = "",
    ) -> clusterlib.ClusterLib:
        """Return the `clusterlib.ClusterLib` instance once we can start the test.

        It checks current conditions and waits if the conditions don't allow to start the test
        right away.
        """
        # pylint: disable=too-many-statements,too-many-branches,too-many-locals

        # don't start new cluster if it was already started outside of test framework
        if DEV_CLUSTER_RUNNING:
            if start_cmd:
                LOGGER.warning(
                    f"Ignoring the '{start_cmd}' cluster start command as "
                    "'DEV_CLUSTER_RUNNING' is set.")
            return self._reuse_dev_cluster()

        if FORBID_RESTART and start_cmd:
            raise RuntimeError(
                "Cannot use custom start command when 'FORBID_RESTART' is set."
            )

        selected_instance = -1
        restart_here = False
        restart_ready = False
        first_iteration = True
        sleep_delay = 1
        marked_tests_cache: Dict[int, MarkedTestsStatus] = {}

        if start_cmd:
            if not (singleton or mark):
                raise AssertionError(
                    "Custom start command can be used only together with `singleton` or `mark`"
                )
            # always clean after test(s) that started cluster with custom configuration
            cleanup = True

        # iterate until it is possible to start the test
        while True:
            if restart_ready:
                self._restart(start_cmd=start_cmd)

            if not first_iteration:
                helpers.xdist_sleep(random.random() * sleep_delay)

            # nothing time consuming can go under this lock as it will block all other workers
            with helpers.FileLockIfXdist(self.cm.cluster_lock):
                test_on_worker = list(
                    self.cm.lock_dir.glob(
                        f"{CLUSTER_DIR_TEMPLATE}*/{TEST_RUNNING_GLOB}_{self.cm.worker_id}"
                    ))

                # test is already running, nothing to set up
                if (first_iteration and test_on_worker
                        and self.cm._cluster_instance_num != -1
                        and self.cm.cache.cluster_obj):
                    self.cm._log(f"{test_on_worker[0]} already exists")
                    return self.cm.cache.cluster_obj

                first_iteration = False  # needs to be set here, before the first `continue`
                self.cm._cluster_instance_num = -1

                # try all existing cluster instances
                for instance_num in range(self.cm.num_of_instances):
                    # if instance to run the test on was already decided, skip all other instances
                    # pylint: disable=consider-using-in
                    if selected_instance != -1 and instance_num != selected_instance:
                        continue

                    instance_dir = self.cm.lock_dir / f"{CLUSTER_DIR_TEMPLATE}{instance_num}"
                    instance_dir.mkdir(exist_ok=True)

                    # if the selected instance failed to start, move on to other instance
                    if (instance_dir / CLUSTER_DEAD_FILE).exists():
                        selected_instance = -1
                        restart_here = False
                        restart_ready = False
                        # remove status files that are checked by other workers
                        for sf in (
                                *instance_dir.glob(f"{TEST_CURR_MARK_GLOB}_*"),
                                *instance_dir.glob(
                                    f"{TEST_MARK_STARTING_GLOB}_*"),
                        ):
                            os.remove(sf)

                        dead_clusters = list(
                            self.cm.lock_dir.glob(
                                f"{CLUSTER_DIR_TEMPLATE}*/{CLUSTER_DEAD_FILE}")
                        )
                        if len(dead_clusters) == self.cm.num_of_instances:
                            raise RuntimeError(
                                "All clusters are dead, cannot run.")
                        continue

                    # singleton test is running, so no other test can be started
                    if (instance_dir / TEST_SINGLETON_FILE).exists():
                        self.cm._log(
                            f"c{instance_num}: singleton test in progress, cannot run"
                        )
                        sleep_delay = 5
                        continue

                    restart_in_progress = list(
                        instance_dir.glob(f"{RESTART_IN_PROGRESS_GLOB}_*"))
                    # cluster restart planned, no new tests can start
                    if not restart_here and restart_in_progress:
                        # no log message here, it would be too many of them
                        sleep_delay = 5
                        continue

                    started_tests = list(
                        instance_dir.glob(f"{TEST_RUNNING_GLOB}_*"))

                    # "marked tests" = group of tests marked with a specific mark.
                    # While these tests are running, no unmarked test can start.
                    marked_starting = list(
                        instance_dir.glob(f"{TEST_MARK_STARTING_GLOB}_*"))
                    marked_running = list(
                        instance_dir.glob(f"{TEST_CURR_MARK_GLOB}_*"))

                    if mark:
                        marked_running_my = (
                            instance_dir /
                            f"{TEST_CURR_MARK_GLOB}_{mark}").exists()
                        marked_starting_my = list(
                            instance_dir.glob(
                                f"{TEST_MARK_STARTING_GLOB}_{mark}_*"))

                        marked_running_my_anywhere = list(
                            self.cm.lock_dir.glob(
                                f"{CLUSTER_DIR_TEMPLATE}*/{TEST_CURR_MARK_GLOB}_{mark}"
                            ))
                        # check if tests with my mark are running on some other cluster instance
                        if not marked_running_my and marked_running_my_anywhere:
                            self.cm._log(
                                f"c{instance_num}: tests marked with my mark '{mark}' "
                                "already running on other cluster instance, cannot run"
                            )
                            continue

                        marked_starting_my_anywhere = list(
                            self.cm.lock_dir.glob(
                                f"{CLUSTER_DIR_TEMPLATE}*/{TEST_MARK_STARTING_GLOB}_{mark}_*"
                            ))
                        # check if tests with my mark are starting on some other cluster instance
                        if not marked_starting_my and marked_starting_my_anywhere:
                            self.cm._log(
                                f"c{instance_num}: tests marked with my mark '{mark}' starting "
                                "on other cluster instance, cannot run")
                            continue

                        # check if this test has the same mark as currently running marked tests
                        if marked_running_my or marked_starting_my:
                            # lock to this cluster instance
                            selected_instance = instance_num
                        elif marked_running or marked_starting:
                            self.cm._log(
                                f"c{instance_num}: tests marked with other mark starting "
                                f"or running, I have different mark '{mark}'")
                            continue

                        # check if needs to wait until marked tests can run
                        if marked_starting_my and started_tests:
                            self.cm._log(
                                f"c{instance_num}: unmarked tests running, wants to start '{mark}'"
                            )
                            sleep_delay = 2
                            continue

                    # no unmarked test can run while marked tests are starting or running
                    elif marked_running or marked_starting:
                        self.cm._log(
                            f"c{instance_num}: marked tests starting or running, "
                            f"I don't have mark")
                        sleep_delay = 5
                        continue

                    # is this the first marked test that wants to run?
                    initial_marked_test = bool(mark and not marked_running)

                    # indicate that it is planned to start marked tests as soon as
                    # all currently running tests are finished or the cluster is restarted
                    if initial_marked_test:
                        # lock to this cluster instance
                        selected_instance = instance_num
                        mark_starting_file = (
                            instance_dir /
                            f"{TEST_MARK_STARTING_GLOB}_{mark}_{self.cm.worker_id}"
                        )
                        if not mark_starting_file.exists():
                            open(
                                mark_starting_file,
                                "a",
                            ).close()
                        if started_tests:
                            self.cm._log(
                                f"c{instance_num}: unmarked tests running, wants to start '{mark}'"
                            )
                            sleep_delay = 3
                            continue

                    # get marked tests status
                    marked_tests_status = self._get_marked_tests_status(
                        cache=marked_tests_cache, instance_num=instance_num)

                    # marked tests are already running
                    if marked_running:
                        active_mark_file = marked_running[0].name

                        # update marked tests status
                        self._update_marked_tests(
                            marked_tests_status=marked_tests_status,
                            active_mark_name=active_mark_file,
                            started_tests=started_tests,
                            instance_num=instance_num,
                        )

                        self.cm._log(
                            f"c{instance_num}: in marked tests branch, "
                            f"I have required mark '{mark}'")

                    # reset counter of cycles with no marked test running
                    marked_tests_status.no_marked_tests_iter = 0

                    # this test is a singleton - no other test can run while this one is running
                    if singleton and started_tests:
                        self.cm._log(
                            f"c{instance_num}: tests are running, cannot start singleton"
                        )
                        sleep_delay = 5
                        continue

                    # this test wants to lock some resources, check if these are not
                    # locked or in use
                    if lock_resources:
                        res_usable = self._are_resources_usable(
                            resources=lock_resources,
                            instance_dir=instance_dir,
                            instance_num=instance_num,
                        )
                        if not res_usable:
                            sleep_delay = 5
                            continue

                    # filter out `lock_resources` from the list of `use_resources`
                    if use_resources and lock_resources:
                        use_resources = list(
                            set(use_resources) - set(lock_resources))

                    # this test wants to use some resources, check if these are not locked
                    if use_resources:
                        res_locked = self._are_resources_locked(
                            resources=use_resources,
                            instance_dir=instance_dir,
                            instance_num=instance_num,
                        )
                        if res_locked:
                            sleep_delay = 5
                            continue

                    # indicate that the cluster will be restarted
                    new_cmd_restart = bool(start_cmd and
                                           (initial_marked_test or singleton))
                    if not restart_here and (
                            new_cmd_restart
                            or self._is_restart_needed(instance_num)):
                        if started_tests:
                            self.cm._log(
                                f"c{instance_num}: tests are running, cannot restart"
                            )
                            continue

                        # Cluster restart will be performed by this worker.
                        # By setting `restart_here`, we make sure this worker continue on
                        # this cluster instance after restart. It is important because
                        # the `start_cmd` used for starting the cluster might be speciffic
                        # to the test.
                        restart_here = True
                        self.cm._log(
                            f"c{instance_num}: setting to restart cluster")
                        selected_instance = instance_num
                        restart_in_progress_file = (
                            instance_dir /
                            f"{RESTART_IN_PROGRESS_GLOB}_{self.cm.worker_id}")
                        if not restart_in_progress_file.exists():
                            open(restart_in_progress_file, "a").close()

                    # we've found suitable cluster instance
                    selected_instance = instance_num
                    self.cm._cluster_instance_num = instance_num
                    cluster_nodes.set_cluster_env(instance_num)

                    if restart_here:
                        if restart_ready:
                            # The cluster was already restarted if we are here and
                            # `restart_ready` is still True.
                            restart_ready = False

                            # Remove status files that are no longer valid after restart.
                            for f in instance_dir.glob(
                                    f"{RESTART_IN_PROGRESS_GLOB}_*"):
                                os.remove(f)
                            for f in instance_dir.glob(
                                    f"{RESTART_NEEDED_GLOB}_*"):
                                os.remove(f)
                        else:
                            self.cm._log(f"c{instance_num}: calling restart")
                            # the actual `_restart` function will be called outside
                            # of global lock
                            restart_ready = True
                            continue

                    # from this point on, all conditions needed to start the test are met

                    # this test is a singleton
                    if singleton:
                        self.cm._log(f"c{instance_num}: starting singleton")
                        open(self.cm.instance_dir / TEST_SINGLETON_FILE,
                             "a").close()

                    # this test is a first marked test
                    if initial_marked_test:
                        self.cm._log(
                            f"c{instance_num}: starting '{mark}' tests")
                        open(
                            self.cm.instance_dir /
                            f"{TEST_CURR_MARK_GLOB}_{mark}", "a").close()
                        for sf in marked_starting:
                            os.remove(sf)

                    # create status file for each in-use resource
                    _ = [
                        open(
                            self.cm.instance_dir /
                            f"{RESOURCE_IN_USE_GLOB}_{r}_{self.cm.worker_id}",
                            "a",
                        ).close() for r in use_resources
                    ]

                    # create status file for each locked resource
                    _ = [
                        open(
                            self.cm.instance_dir /
                            f"{RESOURCE_LOCKED_GLOB}_{r}_{self.cm.worker_id}",
                            "a",
                        ).close() for r in lock_resources
                    ]

                    # cleanup = cluster restart after test (group of tests) is finished
                    if cleanup:
                        # cleanup after group of test that are marked with a marker
                        if mark:
                            self.cm._log(f"c{instance_num}: cleanup and mark")
                            open(
                                self.cm.instance_dir /
                                f"{RESTART_AFTER_MARK_GLOB}_{self.cm.worker_id}",
                                "a",
                            ).close()
                        # cleanup after single test (e.g. singleton)
                        else:
                            self.cm._log(
                                f"c{instance_num}: cleanup and not mark")
                            open(
                                self.cm.instance_dir /
                                f"{RESTART_NEEDED_GLOB}_{self.cm.worker_id}",
                                "a",
                            ).close()

                    break
                else:
                    # if the test cannot start on any instance, return to top-level loop
                    continue

                test_running_file = (
                    self.cm.instance_dir /
                    f"{TEST_RUNNING_GLOB}_{self.cm.worker_id}")
                self.cm._log(
                    f"c{self.cm.cluster_instance_num}: creating {test_running_file}"
                )
                open(test_running_file, "a").close()

                # check if it is necessary to reload data
                state_dir = cluster_nodes.get_cluster_env().state_dir
                self._reload_cluster_obj(state_dir=state_dir)

                cluster_obj = self.cm.cache.cluster_obj
                if not cluster_obj:
                    cluster_obj = cluster_nodes.get_cluster_type(
                    ).get_cluster_obj()

                # `cluster_obj` is ready, we can start the test
                break

        return cluster_obj
    *dst_addrs: str,
    cluster_obj: clusterlib.ClusterLib,
    amount: int = 2_000_000,
    tx_name: Optional[str] = None,
    destination_dir: FileType = ".",
) -> None:
    """Send `amount` from genesis addr to all `dst_addrs`."""
    fund_dst = [
        clusterlib.TxOut(address=d, amount=amount)
        for d in dst_addrs
        if cluster_obj.get_address_balance(d) < amount
    ]
    if not fund_dst:
        return

    with helpers.FileLockIfXdist(f"{helpers.get_basetemp()}/{cluster_obj.genesis_utxo_addr}.lock"):
        tx_name = tx_name or helpers.get_timestamped_rand_str()
        tx_name = f"{tx_name}_genesis_funding"
        fund_tx_files = clusterlib.TxFiles(
            signing_key_files=[
                *cluster_obj.genesis_keys.delegate_skeys,
                cluster_obj.genesis_keys.genesis_utxo_skey,
            ]
        )

        cluster_obj.send_funds(
            src_address=cluster_obj.genesis_utxo_addr,
            destinations=fund_dst,
            tx_name=tx_name,
            tx_files=fund_tx_files,
            destination_dir=destination_dir,
Exemple #15
0
def fund_from_genesis(
    *dst_addrs: str,
    cluster_obj: clusterlib.ClusterLib,
    amount: int = 2_000_000,
    tx_name: Optional[str] = None,
    destination_dir: FileType = ".",
) -> None:
    """Send `amount` from genesis addr to all `dst_addrs`."""
    fund_dst = [
        clusterlib.TxOut(address=d, amount=amount) for d in dst_addrs
        if cluster_obj.get_address_balance(d) < amount
    ]
    if not fund_dst:
        return

    with helpers.FileLockIfXdist(
            f"{helpers.TEST_TEMP_DIR}/{cluster_obj.genesis_utxo_addr}.lock"):
        tx_name = tx_name or get_timestamped_rand_str()
        tx_name = f"{tx_name}_genesis_funding"
        fund_tx_files = clusterlib.TxFiles(signing_key_files=[
            *cluster_obj.delegate_skeys, cluster_obj.genesis_utxo_skey
        ])

        cluster_obj.send_funds(
            src_address=cluster_obj.genesis_utxo_addr,
            destinations=fund_dst,
            tx_name=tx_name,
            tx_files=fund_tx_files,
            destination_dir=destination_dir,
        )
        cluster_obj.wait_for_new_block(new_blocks=2)
    def get(  # noqa: C901
        self,
        singleton: bool = False,
        mark: str = "",
        lock_resources: UnpackableSequence = (),
        use_resources: UnpackableSequence = (),
        cleanup: bool = False,
        start_cmd: str = "",
    ) -> clusterlib.ClusterLib:
        """Return the `clusterlib.ClusterLib` instance once we can start the test.

        It checks current conditions and waits if the conditions don't allow to start the test
        right away.
        """
        # pylint: disable=too-many-statements,too-many-branches,too-many-locals
        selected_instance = -1
        restart_here = False
        restart_ready = False
        mark_start_here = False
        first_iteration = True
        sleep_delay = 1
        marked_tests_cache: Dict[int, MarkedTestsStatus] = {}

        if start_cmd:
            if not (singleton or mark):
                raise AssertionError(
                    "Custom start command can be used only together with `singleton` or `mark`"
                )
            # always clean after test(s) that started cluster with custom configuration
            cleanup = True

        # iterate until it is possible to start the test
        while True:
            if restart_ready:
                self._restart(start_cmd=start_cmd)

            if not first_iteration:
                helpers.xdist_sleep(random.random() * sleep_delay)

            # nothing time consuming can go under this lock as it will block all other workers
            with helpers.FileLockIfXdist(self.cluster_lock):
                test_on_worker = list(
                    self.lock_dir.glob(
                        f"{CLUSTER_DIR_TEMPLATE}*/{TEST_RUNNING_GLOB}_{self.worker_id}"
                    ))

                # test is already running, nothing to set up
                if (first_iteration and test_on_worker
                        and self._cluster_instance != -1
                        and self.cache.cluster_obj):
                    self._log(f"{test_on_worker[0]} already exists")
                    return self.cache.cluster_obj

                first_iteration = False  # needs to be set here, before the first `continue`
                self._cluster_instance = -1

                # try all existing cluster instances
                for instance_num in range(self.num_of_instances):
                    # if instance to run the test on was already decided, skip all other instances
                    # pylint: disable=consider-using-in
                    if selected_instance != -1 and instance_num != selected_instance:
                        continue

                    instance_dir = self.lock_dir / f"{CLUSTER_DIR_TEMPLATE}{instance_num}"
                    instance_dir.mkdir(exist_ok=True)

                    # singleton test is running, so no other test can be started
                    if (instance_dir / TEST_SINGLETON_FILE).exists():
                        self._log(
                            f"c{instance_num}: singleton test in progress, cannot run"
                        )
                        sleep_delay = 5
                        continue

                    restart_in_progress = list(
                        instance_dir.glob(f"{RESTART_IN_PROGRESS_GLOB}_*"))
                    # cluster restart planned, no new tests can start
                    if not restart_here and restart_in_progress:
                        self._log(
                            f"c{instance_num}: restart in progress, cannot run"
                        )
                        continue

                    started_tests = list(
                        instance_dir.glob(f"{TEST_RUNNING_GLOB}_*"))

                    # "marked tests" = group of tests marked with a specific mark.
                    # While these tests are running, no unmarked test can start.
                    # Check if it is indicated that marked tests will start next.
                    marked_tests_starting = list(
                        instance_dir.glob(f"{TEST_MARK_STARTING_GLOB}_*"))
                    marked_tests_starting_my = list(
                        instance_dir.glob(
                            f"{TEST_MARK_STARTING_GLOB}_{mark}_*"))
                    if not mark_start_here and marked_tests_starting_my:
                        self._log(
                            f"c{instance_num}: marked tests starting with my mark, cannot run"
                        )
                        selected_instance = instance_num
                        sleep_delay = 2
                        continue
                    if not mark_start_here and marked_tests_starting:
                        self._log(
                            f"c{instance_num}: marked tests starting, cannot run"
                        )
                        sleep_delay = 2
                        continue
                    if mark_start_here and marked_tests_starting:
                        if started_tests:
                            self._log(
                                f"c{instance_num}: unmarked tests running, cannot start marked test"
                            )
                            sleep_delay = 2
                            continue
                        os.remove(marked_tests_starting[0])
                        mark_start_here = False

                    test_curr_mark = list(
                        instance_dir.glob(f"{TEST_CURR_MARK_GLOB}_*"))
                    first_marked_test = bool(mark and not test_curr_mark)

                    # indicate that it is planned to start marked tests as soon as
                    # all currently running tests are finished
                    if first_marked_test and started_tests:
                        self._log(
                            f"c{instance_num}: unmarked tests running, wants to start '{mark}'"
                        )
                        mark_start_here = True
                        selected_instance = instance_num
                        open(
                            instance_dir /
                            f"{TEST_MARK_STARTING_GLOB}_{mark}_{self.worker_id}",
                            "a").close()
                        sleep_delay = 2
                        continue

                    # get marked tests status
                    marked_tests_status = self._get_marked_tests_status(
                        cache=marked_tests_cache, instance_num=instance_num)

                    # marked tests are already running
                    if test_curr_mark:
                        active_mark_file = test_curr_mark[0].name

                        self._update_marked_tests(
                            marked_tests_status=marked_tests_status,
                            active_mark_name=active_mark_file,
                            started_tests=started_tests,
                            instance_num=instance_num,
                        )

                        if not mark:
                            self._log(
                                f"c{instance_num}: marked tests running, I don't have mark"
                            )
                            sleep_delay = 5
                            continue

                        # check if this test has the same mark as currently running marked tests,
                        # so it can run
                        if f"{TEST_CURR_MARK_GLOB}_{mark}" not in active_mark_file:
                            self._log(
                                f"c{instance_num}: marked tests running, "
                                f"I have different mark - {mark}")
                            sleep_delay = 5
                            continue

                        self._log(f"c{instance_num}: in marked tests branch, "
                                  f"I have required mark '{mark}'")

                    # reset counter of cycles with no marked test running
                    marked_tests_status.no_marked_tests_iter = 0

                    # this test is a singleton - no other test can run while this one is running
                    if singleton and started_tests:
                        self._log(
                            f"c{instance_num}: tests are running, cannot start singleton"
                        )
                        sleep_delay = 5
                        continue

                    # this test wants to lock some resources, check if these are not
                    # locked or in use
                    if lock_resources:
                        res_usable = self._are_resources_usable(
                            resources=lock_resources,
                            instance_dir=instance_dir,
                            instance_num=instance_num,
                        )
                        if not res_usable:
                            sleep_delay = 5
                            continue

                    # filter out `lock_resources` from the list of `use_resources`
                    if use_resources and lock_resources:
                        use_resources = list(
                            set(use_resources) - set(lock_resources))

                    # this test wants to use some resources, check if these are not locked
                    if use_resources:
                        res_locked = self._are_resources_locked(
                            resources=use_resources,
                            instance_dir=instance_dir,
                            instance_num=instance_num,
                        )
                        if res_locked:
                            sleep_delay = 5
                            continue

                    # indicate that the cluster will be restarted
                    new_cmd_restart = bool(start_cmd and
                                           (first_marked_test or singleton))
                    if not restart_here and (
                            new_cmd_restart
                            or self._is_restart_needed(instance_num)):
                        self._log(
                            f"c{instance_num}: setting to restart cluster")
                        restart_here = True
                        selected_instance = instance_num
                        open(
                            instance_dir /
                            f"{RESTART_IN_PROGRESS_GLOB}_{self.worker_id}",
                            "a").close()

                    # cluster restart will be performed by this worker
                    if restart_here and started_tests:
                        self._log(
                            f"c{instance_num}: tests are running, cannot restart"
                        )
                        sleep_delay = 2
                        continue

                    # we've found suitable cluster instance
                    self._cluster_instance = instance_num
                    cluster_instances.set_cardano_node_socket_path(
                        instance_num)

                    if restart_here:
                        if restart_ready:
                            # The cluster was already restarted if we are here and
                            # `restart_ready` is still True.
                            restart_ready = False

                            # Remove status files that are no longer valid after restart.
                            for f in instance_dir.glob(
                                    f"{RESTART_IN_PROGRESS_GLOB}_*"):
                                os.remove(f)
                            for f in instance_dir.glob(
                                    f"{RESTART_NEEDED_GLOB}_*"):
                                os.remove(f)
                        else:
                            self._log(f"c{instance_num}: calling restart")
                            # the actual `_restart` function will be called outside
                            # of global lock
                            restart_ready = True
                            continue

                    # from this point on, all conditions needed to start the test are met

                    # this test is a singleton
                    if singleton:
                        self._log(f"c{instance_num}: starting singleton")
                        open(self.instance_dir / TEST_SINGLETON_FILE,
                             "a").close()

                    # this test is a first marked test
                    if first_marked_test:
                        self._log(f"c{instance_num}: starting '{mark}' tests")
                        open(
                            self.instance_dir /
                            f"{TEST_CURR_MARK_GLOB}_{mark}", "a").close()

                    # create status file for each in-use resource
                    _ = [
                        open(
                            self.instance_dir /
                            f"{RESOURCE_IN_USE_GLOB}_{r}_{self.worker_id}",
                            "a").close() for r in use_resources
                    ]

                    # create status file for each locked resource
                    _ = [
                        open(
                            self.instance_dir /
                            f"{RESOURCE_LOCKED_GLOB}_{r}_{self.worker_id}",
                            "a").close() for r in lock_resources
                    ]

                    # cleanup = cluster restart after test (group of tests) is finished
                    if cleanup:
                        # cleanup after group of test that are marked with a marker
                        if mark:
                            self._log(f"c{instance_num}: cleanup and mark")
                            open(
                                self.instance_dir /
                                f"{RESTART_AFTER_MARK_GLOB}_{self.worker_id}",
                                "a",
                            ).close()
                        # cleanup after single test (e.g. singleton)
                        else:
                            self._log(f"c{instance_num}: cleanup and not mark")
                            open(
                                self.instance_dir /
                                f"{RESTART_NEEDED_GLOB}_{self.worker_id}",
                                "a").close()

                    break
                else:
                    # if the test cannot run on any instance, return to top-level loop
                    continue

                test_running_file = self.instance_dir / f"{TEST_RUNNING_GLOB}_{self.worker_id}"
                self._log(
                    f"c{self.cluster_instance}: creating {test_running_file}")
                open(test_running_file, "a").close()

                cluster_env = devops_cluster.get_cluster_env()
                state_dir = Path(cluster_env["state_dir"])

                # check if it is necessary to reload data
                self._reload_cluster_obj(state_dir=state_dir)

                cluster_obj = self.cache.cluster_obj
                if not cluster_obj:
                    cluster_obj = devops_cluster.get_cluster_obj()

                # `cluster_obj` is ready, we can start the test
                break

        return cluster_obj