Beispiel #1
0
def remove_user(username: str = "demo_user",
                *,
                remove_home: bool = True,
                get_sudo: bool = True) -> None:
    """ """
    import sh
    import getpass

    try:
        user_id = sh.id(["-u", username])
        if int(user_id):
            print(f"User {username} exists with id {user_id}")
            with ContextWrapper(
                    sh.contrib.sudo,
                    construction_kwargs=dict(
                        password=getpass.getpass(
                            prompt=f"[sudo] password for {getpass.getuser()}: "
                        ) if get_sudo else None,
                        _with=True,
                    ),
                    enabled=get_sudo,
            ):
                sh.userdel((["-r"] if remove_home else []) + [f"{username}"])
                print(f"Removed user {username}")
    except (ValueError, sh.ErrorReturnCode_1):
        pass
Beispiel #2
0
def status_service(service_name: str,
                   *,
                   get_sudo: bool = False,
                   run_as: RunAsEnum = RunAsEnum.user) -> None:
    """

    Args:
        service_name:
        :param service_name:
        :param run_as:
        :param get_sudo:
    """
    project_service_name = f"{PROJECT_NAME}_service_{service_name}"
    print(f"Status for {project_service_name}")
    try:
        with ContextWrapper(
                sh.contrib.sudo,
                construction_kwargs=dict(
                    password=(getpass.getpass(
                        prompt=f"[sudo] password for {getpass.getuser()}: ")
                              if get_sudo else None),
                    _with=True,
                ),
                enabled=get_sudo,
        ):
            sh.systemctl((["--user"] if run_as == RunAsEnum.user else []) +
                         ["status", f"{project_service_name}.service"])
    except sh.ErrorReturnCode_3 as e:
        print(e, e.stdout)
Beispiel #3
0
def enable_service(service_name: str,
                   *,
                   get_sudo: bool = False,
                   run_as: RunAsEnum = RunAsEnum.user) -> None:
    """

    Args:
        service_name:
        :param service_name:
        :param run_as:
        :param get_sudo:
    """
    project_service_name = f"{PROJECT_NAME}_service_{service_name}"
    print(f"Enabling {project_service_name}")
    with ContextWrapper(
            sh.contrib.sudo,
            construction_kwargs=dict(
                password=(getpass.getpass(
                    prompt=f"[sudo] password for {getpass.getuser()}: ")
                          if get_sudo else None),
                _with=True,
            ),
            enabled=get_sudo,
    ):
        sh.systemctl((["--user"] if run_as == RunAsEnum.user else []) +
                     [f"enable", f"{project_service_name}.service"])
        start_service(service_name, get_sudo=False, run_as=run_as)
Beispiel #4
0
def make_user(
    username: str = "demo_user",
    password: str = None,
    *,
    add_home: bool = True,
    home_dir: Path = None,
    allow_existing_user: bool = True,
    get_sudo: bool = True,
) -> None:
    """ """
    import crypt
    import sh
    import getpass

    query = []

    if add_home:
        query += [f"-m", f"-d"]
        if home_dir:
            query += [str(home_dir)]
        else:
            query += [f"/home/{username}"]

    try:
        user_id = sh.id(["-u", username])
        if int(user_id):
            if not allow_existing_user:
                raise FileExistsError
            group_id = sh.id(["-g", username])
            print(f"user {username} exists with id {user_id} and {group_id}")
    except (ValueError, sh.ErrorReturnCode_1):
        pass
        with ContextWrapper(
                sh.contrib.sudo,
                construction_kwargs=dict(
                    password=getpass.getpass(
                        prompt=f"[sudo] password for {getpass.getuser()}: ")
                    if get_sudo else None,
                    _with=True,
                ),
                enabled=get_sudo,
        ):
            try:
                sh.useradd(query + [
                    f"-p",
                    f"{crypt.crypt(password if password else input(f'new password for user {username}: '), '22')}",
                    f"{username}",
                ])
            except sh.ErrorReturnCode_9:
                pass
Beispiel #5
0
def remove_service(
    service_name: str,
    *,
    remove_app_user: bool = True,
    get_sudo: bool = False,
    run_as: RunAsEnum = RunAsEnum.user,
) -> None:
    """

    Args:
        :param get_sudo:
        :param service_name:
        :param remove_app_user:
        :param run_as:
    """

    try:
        # get_sudo = not run_as == RunAsEnum.user
        with ContextWrapper(
                sh.contrib.sudo,
                construction_kwargs=dict(
                    password=(getpass.getpass(
                        prompt=f"[sudo] password for {getpass.getuser()}: ")
                              if get_sudo else None),
                    _with=True,
                ),
                enabled=get_sudo,
        ):
            disable_service(service_name, get_sudo=False, run_as=run_as)
            project_service_name = f"{PROJECT_NAME}_service_{service_name}"
            target_service_file_path = target_service_path(
                project_service_name, run_as=run_as)
            print(f"Removing {target_service_file_path}")

            sh.rm(target_service_file_path)
            sh.systemctl("daemon-reload")

            if run_as == RunAsEnum.app_user and remove_app_user:
                # DO CLEAN UP!
                remove_user(service_name + "_user",
                            get_sudo=False,
                            run_as=run_as)
    except sh.ErrorReturnCode_1:
        pass
Beispiel #6
0
def predictor_response_train_model(
        model,
        *,
        train_iterator,
        criterion,
        optimiser,
        scheduler,
        writer,
        interrupted_path,
        val_data_iterator=None,
        num_updates: int = 250000,
        device=global_torch_device(),
        early_stop=None,
        debug=False,
):
    """

    :param model:
    :param train_iterator:
    :param criterion:
    :param optimiser:
    :param scheduler:
    :param writer:
    :param interrupted_path:
    :param val_data_iterator:
    :param num_updates:
    :param device:
    :param early_stop:
    :return:
    """
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = 1e10
    since = time.time()

    try:
        sess = tqdm(range(num_updates), leave=False, disable=False)
        val_loss = 0
        update_loss = 0
        val_acc = 0
        with ContextWrapper(torch.autograd.detect_anomaly, enabled=debug):
            for update_i in sess:
                for phase in [SplitEnum.training, SplitEnum.validation]:
                    if phase == SplitEnum.training:
                        with TorchTrainSession(model):

                            input, true_label = next(train_iterator)

                            rgb_imgs = to_tensor(input,
                                                 dtype=torch.float,
                                                 device=device).repeat(
                                                     1, 3, 1, 1)
                            true_label = to_tensor(true_label,
                                                   dtype=torch.long,
                                                   device=device)
                            optimiser.zero_grad()

                            pred = model(rgb_imgs)
                            loss = criterion(pred, true_label)
                            loss.backward()
                            optimiser.step()

                            update_loss = loss.data.cpu().numpy()
                            writer.scalar(f"loss/train", update_loss, update_i)

                            if scheduler:
                                scheduler.step()
                    elif val_data_iterator:
                        with TorchEvalSession(model):
                            test_rgb_imgs, test_true_label = next(
                                val_data_iterator)

                            test_rgb_imgs = to_tensor(test_rgb_imgs,
                                                      dtype=torch.float,
                                                      device=device).repeat(
                                                          1, 3, 1, 1)
                            test_true_label = to_tensor(test_true_label,
                                                        dtype=torch.long,
                                                        device=device)

                            with torch.no_grad():
                                val_pred = model(test_rgb_imgs)
                                val_loss = criterion(val_pred, test_true_label)

                            _, cat = torch.max(val_pred, -1)
                            val_acc = torch.sum(
                                cat == test_true_label) / float(cat.size(0))
                            writer.scalar(f"loss/acc", val_acc, update_i)
                            writer.scalar(f"loss/val", val_loss, update_i)

                            if val_loss < best_val_loss:
                                best_val_loss = val_loss

                                best_model_wts = copy.deepcopy(
                                    model.state_dict())
                                sess.write(
                                    f"New best validation model at update {update_i} with best_val_loss {best_val_loss}"
                                )
                                torch.save(model.state_dict(),
                                           interrupted_path)

                        if early_stop is not None and val_pred < early_stop:
                            break
                sess.set_description_str(f"Update {update_i} - {phase} "
                                         f"update_loss:{update_loss:2f} "
                                         f"val_loss:{val_loss}"
                                         f"val_acc:{val_acc}")

    except KeyboardInterrupt:
        print("Interrupt")
    finally:
        pass

    model.load_state_dict(best_model_wts)  # load best model weights

    time_elapsed = time.time() - since
    print(f"{time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val loss: {best_val_loss:3f}")

    return model
Beispiel #7
0
def loss_grad_check(
    model: torch.nn.Module,
    loss_fn: callable,
    input: torch.Tensor,
    target: torch.Tensor,
    epsilon: float = 1e-6,
    error_tolerance: float = 1e-5,
) -> None:
    """
    two sided gradient numerical approximation
    DOES not work, please refer to torch/autograd/gradcheck.py

    :param input:
    :type input:
    :param target:
    :type target:
    :param error_tolerance:
    :type error_tolerance:
    :param model:
    :type model:
    :param loss_fn:
    :type loss_fn:
    :param epsilon:
    :type epsilon:
    :return:
    :rtype:"""
    assert epsilon > 0.0
    c_model = copy.deepcopy(model)

    loss = loss_fn(model(input), target)
    loss.backward()
    compute_gradients = False
    with ContextWrapper(torch.no_grad, not compute_gradients):
        with TorchEvalSession(model):
            for (n, c_p), p in zip(
                    named_trainable_parameters(c_model).items(),
                    trainable_parameters(model)):
                for i, c_p_o in enumerate(c_p):
                    a = c_p_o.size()
                    if len(a) > 0:
                        for j in range(a[0]):
                            cp_orig = c_p.data.clone()

                            c_p[i][j] += epsilon  # positive
                            loss_p = loss_fn(c_model(input.clone()),
                                             target.clone()).clone()

                            c_p.data = cp_orig

                            c_p[i][j] -= epsilon  # negative
                            loss_n = loss_fn(c_model(input.clone()),
                                             target.clone()).clone()

                            c_p.data = cp_orig

                            if (
                                    True
                            ):  # TODO: make check based on the entire set of parameters at once
                                grad_approx = (loss_p - loss_n) / (2 * epsilon)

                                denom = math.sqrt(grad_approx**2) + math.sqrt(
                                    p.grad[i][j]**2)
                                if denom > 0:
                                    deviance = (math.sqrt(
                                        (grad_approx - p.grad[i][j])**2) /
                                                denom)
                                    # assert torch.sign(grad_approx) == torch.sign(p.grad[i][j]), f'apprx: {grad_approx}, analytical {p.grad[i][j]}'
                                    assert (
                                        deviance <= error_tolerance
                                    ), f"Numerical gradient approximation of parameter {n} deviates larger than tolerance {error_tolerance}, deviance: {deviance}, approx:{grad_approx, loss_p, loss_n}, p.grad[i][j]:{p.grad[i][j]}"
                                else:
                                    pass
Beispiel #8
0
def install_service(
    service_entry_point_path: Path,
    service_name: str,
    *,
    description: str = None,
    auto_enable: bool = True,
    run_as: RunAsEnum = RunAsEnum.user,
    # get_sudo: bool = False,
    restart: RestartServiceEnum = RestartServiceEnum.on_failure,
) -> None:
    """
    Args:
        :param restart:
        :param service_entry_point_path:
        :param service_name:
        :param description:
        :param auto_enable:
        :param run_as:
    """
    assert (service_entry_point_path.is_file()
            and service_entry_point_path.name.endswith(".py"))
    project_service_name = f"{PROJECT_NAME}_service_{service_name}"
    user = getpass.getuser()

    systemd_service_file_path = target_service_path(project_service_name,
                                                    run_as=run_as)
    print(f"Installing {systemd_service_file_path}")
    get_sudo = run_as != RunAsEnum.user
    with ContextWrapper(
            sh.contrib.sudo,
            construction_kwargs=dict(
                password=(getpass.getpass(
                    prompt=f"[sudo] password for {user}: ")
                          if get_sudo else None),
                _with=True,
            ),
            enabled=get_sudo,
    ):
        if run_as == RunAsEnum.app_user:
            service_user = service_name + "_user"
            make_user(service_user, get_sudo=False)
            service_target = "default.target"
            service_group = service_user
        elif run_as == RunAsEnum.root:
            service_user = "******"
            service_target = "multi-user.target"
            service_group = service_user
        elif run_as == RunAsEnum.user:
            service_user = user
            service_target = "default.target"
            service_group = service_user
        else:
            raise ValueError

        sh.touch(systemd_service_file_path)
        group_name = str(sh.id(["-g", "-n", service_user])).strip("\n")
        assert service_group == group_name
        current_owner = sh.ls("-l", systemd_service_file_path).split(" ")[2]
        if current_owner != service_user:  # SETTING UP PERMISSIONS
            print(
                f"Changing owner of service file from {current_owner} to {service_user}"
            )
            if run_as == RunAsEnum.root:
                group_name = ""
            else:
                print(f"with common group {group_name}")
                # group_id = sh.id(["-g", service_user])
                sh.usermod(["-a", "-G", group_name,
                            user])  # TODO: Polluting groups of user
        sh.chown(
            [f"{user}:{group_name}", service_entry_point_path]
        )  # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group.
        sh.chown(
            [f"{user}:{group_name}", systemd_service_file_path]
        )  # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group.

        print("writing service file")
        if not description:
            description = f"heimdallr service for {service_name}"
        with open(systemd_service_file_path, "w") as f:
            f.writelines(
                SERVICE_TEMPLATE.format(
                    service_name=project_service_name,
                    service_user=service_user,
                    executable=sys.executable,
                    description=description,
                    service_entry_point_path=service_entry_point_path,
                    service_target=service_target,
                    service_group=service_group,
                    restart=restart.value,
                ))
        sh.chown(
            [f"{service_user}:{group_name}", systemd_service_file_path]
        )  # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group.
        sh.chmod(["664", systemd_service_file_path])
        sh.chmod(["774", service_entry_point_path])
        sh.systemctl("daemon-reload")  # TODO: Requires sudo?

        if auto_enable:
            enable_service(service_name, get_sudo=False, run_as=run_as)