def remove_user(username: str = "demo_user", *, remove_home: bool = True, get_sudo: bool = True) -> None: """ """ import sh import getpass try: user_id = sh.id(["-u", username]) if int(user_id): print(f"User {username} exists with id {user_id}") with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=getpass.getpass( prompt=f"[sudo] password for {getpass.getuser()}: " ) if get_sudo else None, _with=True, ), enabled=get_sudo, ): sh.userdel((["-r"] if remove_home else []) + [f"{username}"]) print(f"Removed user {username}") except (ValueError, sh.ErrorReturnCode_1): pass
def status_service(service_name: str, *, get_sudo: bool = False, run_as: RunAsEnum = RunAsEnum.user) -> None: """ Args: service_name: :param service_name: :param run_as: :param get_sudo: """ project_service_name = f"{PROJECT_NAME}_service_{service_name}" print(f"Status for {project_service_name}") try: with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=(getpass.getpass( prompt=f"[sudo] password for {getpass.getuser()}: ") if get_sudo else None), _with=True, ), enabled=get_sudo, ): sh.systemctl((["--user"] if run_as == RunAsEnum.user else []) + ["status", f"{project_service_name}.service"]) except sh.ErrorReturnCode_3 as e: print(e, e.stdout)
def enable_service(service_name: str, *, get_sudo: bool = False, run_as: RunAsEnum = RunAsEnum.user) -> None: """ Args: service_name: :param service_name: :param run_as: :param get_sudo: """ project_service_name = f"{PROJECT_NAME}_service_{service_name}" print(f"Enabling {project_service_name}") with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=(getpass.getpass( prompt=f"[sudo] password for {getpass.getuser()}: ") if get_sudo else None), _with=True, ), enabled=get_sudo, ): sh.systemctl((["--user"] if run_as == RunAsEnum.user else []) + [f"enable", f"{project_service_name}.service"]) start_service(service_name, get_sudo=False, run_as=run_as)
def make_user( username: str = "demo_user", password: str = None, *, add_home: bool = True, home_dir: Path = None, allow_existing_user: bool = True, get_sudo: bool = True, ) -> None: """ """ import crypt import sh import getpass query = [] if add_home: query += [f"-m", f"-d"] if home_dir: query += [str(home_dir)] else: query += [f"/home/{username}"] try: user_id = sh.id(["-u", username]) if int(user_id): if not allow_existing_user: raise FileExistsError group_id = sh.id(["-g", username]) print(f"user {username} exists with id {user_id} and {group_id}") except (ValueError, sh.ErrorReturnCode_1): pass with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=getpass.getpass( prompt=f"[sudo] password for {getpass.getuser()}: ") if get_sudo else None, _with=True, ), enabled=get_sudo, ): try: sh.useradd(query + [ f"-p", f"{crypt.crypt(password if password else input(f'new password for user {username}: '), '22')}", f"{username}", ]) except sh.ErrorReturnCode_9: pass
def remove_service( service_name: str, *, remove_app_user: bool = True, get_sudo: bool = False, run_as: RunAsEnum = RunAsEnum.user, ) -> None: """ Args: :param get_sudo: :param service_name: :param remove_app_user: :param run_as: """ try: # get_sudo = not run_as == RunAsEnum.user with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=(getpass.getpass( prompt=f"[sudo] password for {getpass.getuser()}: ") if get_sudo else None), _with=True, ), enabled=get_sudo, ): disable_service(service_name, get_sudo=False, run_as=run_as) project_service_name = f"{PROJECT_NAME}_service_{service_name}" target_service_file_path = target_service_path( project_service_name, run_as=run_as) print(f"Removing {target_service_file_path}") sh.rm(target_service_file_path) sh.systemctl("daemon-reload") if run_as == RunAsEnum.app_user and remove_app_user: # DO CLEAN UP! remove_user(service_name + "_user", get_sudo=False, run_as=run_as) except sh.ErrorReturnCode_1: pass
def predictor_response_train_model( model, *, train_iterator, criterion, optimiser, scheduler, writer, interrupted_path, val_data_iterator=None, num_updates: int = 250000, device=global_torch_device(), early_stop=None, debug=False, ): """ :param model: :param train_iterator: :param criterion: :param optimiser: :param scheduler: :param writer: :param interrupted_path: :param val_data_iterator: :param num_updates: :param device: :param early_stop: :return: """ best_model_wts = copy.deepcopy(model.state_dict()) best_val_loss = 1e10 since = time.time() try: sess = tqdm(range(num_updates), leave=False, disable=False) val_loss = 0 update_loss = 0 val_acc = 0 with ContextWrapper(torch.autograd.detect_anomaly, enabled=debug): for update_i in sess: for phase in [SplitEnum.training, SplitEnum.validation]: if phase == SplitEnum.training: with TorchTrainSession(model): input, true_label = next(train_iterator) rgb_imgs = to_tensor(input, dtype=torch.float, device=device).repeat( 1, 3, 1, 1) true_label = to_tensor(true_label, dtype=torch.long, device=device) optimiser.zero_grad() pred = model(rgb_imgs) loss = criterion(pred, true_label) loss.backward() optimiser.step() update_loss = loss.data.cpu().numpy() writer.scalar(f"loss/train", update_loss, update_i) if scheduler: scheduler.step() elif val_data_iterator: with TorchEvalSession(model): test_rgb_imgs, test_true_label = next( val_data_iterator) test_rgb_imgs = to_tensor(test_rgb_imgs, dtype=torch.float, device=device).repeat( 1, 3, 1, 1) test_true_label = to_tensor(test_true_label, dtype=torch.long, device=device) with torch.no_grad(): val_pred = model(test_rgb_imgs) val_loss = criterion(val_pred, test_true_label) _, cat = torch.max(val_pred, -1) val_acc = torch.sum( cat == test_true_label) / float(cat.size(0)) writer.scalar(f"loss/acc", val_acc, update_i) writer.scalar(f"loss/val", val_loss, update_i) if val_loss < best_val_loss: best_val_loss = val_loss best_model_wts = copy.deepcopy( model.state_dict()) sess.write( f"New best validation model at update {update_i} with best_val_loss {best_val_loss}" ) torch.save(model.state_dict(), interrupted_path) if early_stop is not None and val_pred < early_stop: break sess.set_description_str(f"Update {update_i} - {phase} " f"update_loss:{update_loss:2f} " f"val_loss:{val_loss}" f"val_acc:{val_acc}") except KeyboardInterrupt: print("Interrupt") finally: pass model.load_state_dict(best_model_wts) # load best model weights time_elapsed = time.time() - since print(f"{time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s") print(f"Best val loss: {best_val_loss:3f}") return model
def loss_grad_check( model: torch.nn.Module, loss_fn: callable, input: torch.Tensor, target: torch.Tensor, epsilon: float = 1e-6, error_tolerance: float = 1e-5, ) -> None: """ two sided gradient numerical approximation DOES not work, please refer to torch/autograd/gradcheck.py :param input: :type input: :param target: :type target: :param error_tolerance: :type error_tolerance: :param model: :type model: :param loss_fn: :type loss_fn: :param epsilon: :type epsilon: :return: :rtype:""" assert epsilon > 0.0 c_model = copy.deepcopy(model) loss = loss_fn(model(input), target) loss.backward() compute_gradients = False with ContextWrapper(torch.no_grad, not compute_gradients): with TorchEvalSession(model): for (n, c_p), p in zip( named_trainable_parameters(c_model).items(), trainable_parameters(model)): for i, c_p_o in enumerate(c_p): a = c_p_o.size() if len(a) > 0: for j in range(a[0]): cp_orig = c_p.data.clone() c_p[i][j] += epsilon # positive loss_p = loss_fn(c_model(input.clone()), target.clone()).clone() c_p.data = cp_orig c_p[i][j] -= epsilon # negative loss_n = loss_fn(c_model(input.clone()), target.clone()).clone() c_p.data = cp_orig if ( True ): # TODO: make check based on the entire set of parameters at once grad_approx = (loss_p - loss_n) / (2 * epsilon) denom = math.sqrt(grad_approx**2) + math.sqrt( p.grad[i][j]**2) if denom > 0: deviance = (math.sqrt( (grad_approx - p.grad[i][j])**2) / denom) # assert torch.sign(grad_approx) == torch.sign(p.grad[i][j]), f'apprx: {grad_approx}, analytical {p.grad[i][j]}' assert ( deviance <= error_tolerance ), f"Numerical gradient approximation of parameter {n} deviates larger than tolerance {error_tolerance}, deviance: {deviance}, approx:{grad_approx, loss_p, loss_n}, p.grad[i][j]:{p.grad[i][j]}" else: pass
def install_service( service_entry_point_path: Path, service_name: str, *, description: str = None, auto_enable: bool = True, run_as: RunAsEnum = RunAsEnum.user, # get_sudo: bool = False, restart: RestartServiceEnum = RestartServiceEnum.on_failure, ) -> None: """ Args: :param restart: :param service_entry_point_path: :param service_name: :param description: :param auto_enable: :param run_as: """ assert (service_entry_point_path.is_file() and service_entry_point_path.name.endswith(".py")) project_service_name = f"{PROJECT_NAME}_service_{service_name}" user = getpass.getuser() systemd_service_file_path = target_service_path(project_service_name, run_as=run_as) print(f"Installing {systemd_service_file_path}") get_sudo = run_as != RunAsEnum.user with ContextWrapper( sh.contrib.sudo, construction_kwargs=dict( password=(getpass.getpass( prompt=f"[sudo] password for {user}: ") if get_sudo else None), _with=True, ), enabled=get_sudo, ): if run_as == RunAsEnum.app_user: service_user = service_name + "_user" make_user(service_user, get_sudo=False) service_target = "default.target" service_group = service_user elif run_as == RunAsEnum.root: service_user = "******" service_target = "multi-user.target" service_group = service_user elif run_as == RunAsEnum.user: service_user = user service_target = "default.target" service_group = service_user else: raise ValueError sh.touch(systemd_service_file_path) group_name = str(sh.id(["-g", "-n", service_user])).strip("\n") assert service_group == group_name current_owner = sh.ls("-l", systemd_service_file_path).split(" ")[2] if current_owner != service_user: # SETTING UP PERMISSIONS print( f"Changing owner of service file from {current_owner} to {service_user}" ) if run_as == RunAsEnum.root: group_name = "" else: print(f"with common group {group_name}") # group_id = sh.id(["-g", service_user]) sh.usermod(["-a", "-G", group_name, user]) # TODO: Polluting groups of user sh.chown( [f"{user}:{group_name}", service_entry_point_path] ) # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group. sh.chown( [f"{user}:{group_name}", systemd_service_file_path] ) # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group. print("writing service file") if not description: description = f"heimdallr service for {service_name}" with open(systemd_service_file_path, "w") as f: f.writelines( SERVICE_TEMPLATE.format( service_name=project_service_name, service_user=service_user, executable=sys.executable, description=description, service_entry_point_path=service_entry_point_path, service_target=service_target, service_group=service_group, restart=restart.value, )) sh.chown( [f"{service_user}:{group_name}", systemd_service_file_path] ) # If a colon but no group name follows the user name, that user is made the owner of the files and the group of the files is changed to that user's login group. sh.chmod(["664", systemd_service_file_path]) sh.chmod(["774", service_entry_point_path]) sh.systemctl("daemon-reload") # TODO: Requires sudo? if auto_enable: enable_service(service_name, get_sudo=False, run_as=run_as)