def diff_hw_config(hardware_cfg: Path) -> None: """Diff changes if we updated the hardware configuration. :param hardware_cfg: path to ``hosts/$(hostname)-hardware-configuration.nix`` """ # Figure out what would happen if we ran nixos-generate-config. info("Diff if `nixos-generate-config` was run:") cmd( "nixos-generate-config --show-hardware-config " + f"| diff --ignore-all-space {hardware_cfg} - " + "| delta" ) new_cfg = get_output(["nixos-generate-config", "--show-hardware-config"]) diff = get_output( [ "diff", "--report-identical-files", "--new-file", "--unified", "--ignore-all-space", str(hardware_cfg), "-", ], input=new_cfg, # 1 just means we found differences ok_returncodes=[0, 1], ) delta = subprocess.run(["delta"], input=diff, encoding="utf-8", check=False) if delta.returncode != 0: warn(f"delta exited with non-zero return code {delta.returncode}")
def execute(data, num_folds=5): """ Compute the Root Mean Squared Error using num_folds for cross validation :param data: Raw Data frame parsed from CSV :param num_folds: The number of folds to use :return: Root Mean Squared Error """ assert data is not None, "data must be a valid DataFrame" assert num_folds > 1, "num_folds must be greater than one." # 2. Randomizes the data randomized_data = util.randomize_data(data) # 3. Creates S folds (for our purposes S = 5, but make your code generalizable, that is it should # work for any legal value of S) folds = divide_data(randomized_data, num_folds) squared_errors = [] # 4. For i = 1 to S for i in xrange(0, num_folds): # (a) Select fold i as your testing data and the remaining (S - 1) folds as your training data test_data = folds[i] training_data = select_training_data(folds, i) # (b) Standardizes the data (except for the last column of course) based on the training data standardized_train_data, mean, std = util.standardize_data( util.get_features(training_data)) # Add offset column at the front standardized_train_data.insert(0, "Bias", 1) # (c) Train a closed-form linear regression model training_outputs = util.get_output(training_data) weights = cflr.find_weights(standardized_train_data, training_outputs) # (d) Compute the squared error for each sample in the current testing fold expected = util.get_output(test_data) actual = cflr.apply_solution(util.get_features(test_data), mean, std, weights) squared_error = (expected - actual)**2 squared_errors.append(squared_error) # 5. Compute the RMSE using all the errors. rmse = compute_rmse(len(data), squared_errors) return rmse
def run(self, path): oldpath = os.getcwd() os.chdir(path) try: out = get_output(self.cmd, shell=True) return out finally: os.chdir(oldpath)
def test_osinfo_db_path_dir(): """ Test osinfo-db-path --dir """ cmd = [util.Tools.db_path, util.ToolsArgs.DIR, FOOBAR_DIR] output = util.get_output(cmd) expected_output = FOOBAR_DIR + "\n" assert output == expected_output
def test_osinfo_db_path_system(): """ Test osinfo-db-path --system """ if "OSINFO_SYSTEM_DIR" in os.environ: del os.environ["OSINFO_SYSTEM_DIR"] cmd = [util.Tools.db_path, util.ToolsArgs.SYSTEM] output = util.get_output(cmd) expected_output = os.path.join(DATADIR, "osinfo\n") assert output == expected_output
def test_osinfo_db_path_local(): """ Test osinfo-db-path --local """ if "OSINFO_LOCAL_DIR" in os.environ: del os.environ["OSINFO_LOCAL_DIR"] cmd = [util.Tools.db_path, util.ToolsArgs.LOCAL] output = util.get_output(cmd) expected_output = os.path.join(SYSCONFDIR, "osinfo\n") assert output == expected_output
def test_get_output(self): """ test that get_output returns successfully and executes as expected. """ cmd = '%s %s' % (self.echo, self.testdata) actual = util.get_output(cmd) #echo replaces EOF with EOL expected = 'asdfqwer\n' self.assertEqual(expected, actual)
def git_cmd(path, *args, **kwargs): if path: repo = os.path.join(path, ".git") cmds = ["git", "--git-dir", repo, "--work-tree", path] + list(args) else: cmds = ["git"] + list(args) try: return get_output(cmds, **kwargs) except CmdError,e: log("Error running git command: %r", e.cmd) log("Failure with code: %d", e.ret) raise
def execute(data, training_data_ratio=2.0 / 3.0, k=1): """ Execute the "Locally-Weighted" Linear Regression (using Closed-Form Linear Regression) :param data: Raw Data frame parsed from CSV :param training_data_ratio: The percent (0.0 to 1.0) of input data to use in training. :param k: Smoothing parameter for local weight computation :return: Nothing """ # 2. Randomize the data randomized_data = util.randomize_data(data) # 3. Select the first 2 / 3(round up) of the data for training and the remaining for testing training_data, test_data = util.split_data(randomized_data, training_data_ratio) training_outputs = util.get_output(training_data) # 4. Standardize the data(except for the last column of course) using the training data standardized_training_data, mean, std = util.standardize_data( util.get_features(training_data)) # Add offset column at the front standardized_training_data.insert(0, "Bias", 1) std_test_data, _, _ = util.standardize_data(util.get_features(test_data), mean, std) std_test_data.insert(0, "Bias", 1) squared_errors = [] # 5. Then for each testing sample for i in xrange(0, len(std_test_data)): testing_sample = std_test_data.iloc[i] expected_output = test_data.loc[testing_sample.name][-1] theta_query = compute_theta_query(testing_sample, standardized_training_data, training_outputs, k) # (b) Evaluate the testing sample using the local model. actual_output = np.dot(testing_sample, theta_query) # (c) Compute the squared error of the testing sample. squared_errors.append(util.compute_se(expected_output, actual_output)) # 6. Compute the root mean squared error (RMSE) sum_of_squared_errors = 0 for error in squared_errors: sum_of_squared_errors += error mean_squared_error = sum_of_squared_errors / len(squared_errors) rmse = math.sqrt(mean_squared_error) return rmse
def test_osinfo_db_path_root(): """ Test osinfo-db-path --root FOOBAR_DIR --system """ if "OSINFO_SYSTEM_DIR" in os.environ: del os.environ["OSINFO_SYSTEM_DIR"] cmd = [util.Tools.db_path, util.ToolsArgs.ROOT, FOOBAR_DIR, util.ToolsArgs.SYSTEM] output = util.get_output(cmd) expected_output = os.path.join(FOOBAR_DIR, *DATADIR.split("/"), "osinfo\n") assert output == expected_output
def parse_args(cls) -> Args: """Type-safe wrapper around ``argparse.ArgumentParser.parse_args``.""" args = cls._parser().parse_args() return cls( verbose=args.verbose, update=args.update, force=args.force, diff=args.diff, hostname=args.hostname if args.hostname is not None else get_output(["hostname"]), dry_run=args.dry_run, )
def test_osinfo_db_path_user(): """ Test osinfo-db-path --user """ if "OSINFO_USER_DIR" in os.environ: del os.environ["OSINFO_USER_DIR"] if "XDG_CONFIG_HOME" in os.environ: del os.environ["XDG_CONFIG_HOME"] cmd = [util.Tools.db_path, util.ToolsArgs.USER] output = util.get_output(cmd) expected_output = os.path.join(os.environ["HOME"], ".config", "osinfo\n") assert output == expected_output
def update_hw_config_force(hardware_cfg: Path) -> None: """Generate and replace the hardware configuration. Performs no safety checks, but doesn't write if ``DRY_RUN`` is true. :param hardware_cfg: Path to ``hosts/$(hostname)-hardware-configuration.nix`` file to replace. """ info(("Updating " if hardware_cfg.exists() else "Generating ") + p(hardware_cfg)) cmd(f"nixos-generate-config --show-hardware-config > {hardware_cfg}") new_hardware_config = get_output( ["nixos-generate-config", "--show-hardware-config"] ) if not DRY_RUN: hardware_cfg.write_text(new_hardware_config)
def git_repo_root() -> Path: """Gets the git repository root directory. Is given relative to this file; it would therefore be unsurprising if this function returns the empty path or the current directory path, a single dot. """ # dirname is the directory this file is in... dirname = Path(__file__).parent # ...which we use to determine the repository root. dbg("Getting repository root directory.") # Don't worry that `relative_to` can raise an exception; we know that this # file is in the repository. return Path( get_output(["git", "rev-parse", "--show-toplevel"], cwd=dirname) ).relative_to(dirname.absolute())
def word_count(self, base, path): out = get_output(["perl", self.tex_count_path, "-inc", path], cwd=base) if self.debug: log("TeXCount: %s", out) results = {} name = None for l in out.splitlines(): if not ":" in l: continue k, v = l.split(":")[0].strip().rstrip(), l.split( ":")[1].strip().rstrip() if k in ["File", "Included file"]: name = v results[name] = {} elif k == "File(s) total": name = "total" results[name] = {} elif name is not None: results[name][k] = v return (out, results)
def update_hw_config(args: Args, repo_root: Path, hardware_cfg: Path) -> None: """Update ``hardware_cfg``, checking that it's not modified in the repo.""" dbg("Getting `git status` to check if hardware config has been modified.") git_status = get_output( ["git", "status", "--porcelain", "--untracked-files"], cwd=repo_root, ).splitlines() # Determine if the local hardware configuration is modified by seeing if # any of the modified filenames match it. hardware_cfg_modified = any( # Slice the string to transform # " M hosts/dahurica-hardware-configuration.nix" # ^^^^ # 0123 # into # "hosts/dahurica-hardware-configuration.nix" hardware_cfg.samefile(repo_root / filename[3:]) for filename in git_status if GIT_STATUS_MODIFIED.match(filename) ) dbg( "Hardware configuration " + ("has" if hardware_cfg_modified else f"has {BOLD}not{RESET_BOLD}") + " been modified." ) if hardware_cfg_modified: if args.force: error(f"There are uncomitted changes to {p(hardware_cfg)};") error("refusing to overwrite with `nixos-generate-config`.") error("Either commit your changes or pass --force to overwrite local files") args.update = False else: info( f"There are uncomitted changes to {p(hardware_cfg)} but " + p("--force") + " was given; overwriting" ) # We may have updated `args.update` above, so double-check it here. if args.update: # Note: `DRY_RUN` is handled in `update_hw_config_force`. update_hw_config_force(hardware_cfg)
def get_secret_ids(): """ List the available secret keys that can be used for decryption or signing. return: A dictionary containing subkey and key keys, with lists of every available key list respectively """ out = util.get_output('gpg --list-secret-keys --with-colons').split('\n') subkeys = [] keys = [] for line in out: if re.search('^ssb', line): subkeys.append(line.split(':')[4]) elif re.search('^sec', line): keys.append(line.split(':')[4]) return {'keys': keys, 'subkeys': subkeys }
def word_count(self, base, path): out = get_output(["perl", self.tex_count_path, "-inc", path], cwd=base) if self.debug: log("TeXCount: %s", out) results = {} name = None for l in out.splitlines(): if not ":" in l: continue k, v = l.split(":")[0].strip().rstrip(), l.split(":")[1].strip().rstrip() if k in ["File", "Included file"]: name = v results[name] = {} elif k == "File(s) total": name = "total" results[name] = {} elif name is not None: results[name][k] = v return (out, results)
def execute(data): """ :param data: Raw Data frame parsed from CSV :return: Nothing """ # 2. Randomizes the data randomized_data = util.randomize_data(data) # 3. Selects the first 2/3 (round up) of the data for training and the remaining for testing training_data_size = 2.0 / 3.0 training_data, test_data = util.split_data(randomized_data, training_data_size) # Capture the predicted outputs training_outputs = training_data[training_data.columns[-1]] # 4. Standardizes the data (except for the last column of course) using the training data training_inputs, training_mean, training_std = util.standardize_data( util.get_features(training_data)) # Add offset column at the front training_inputs.insert(0, "Bias", 1) # 5. Computes the closed-form solution of linear regression weights = find_weights(training_inputs, training_outputs) # 6. Applies the solution to the testing samples test_input = util.get_features(test_data) expected = util.get_output(test_data) actual = apply_solution(test_input, training_mean, training_std, weights) # 7. Computes the root mean squared error (RMSE) rmse = util.compute_rmse(expected, actual) return weights, rmse
def execute(data, learning_rate=0.001, training_data_ratio=2.0 / 3, max_iterations=1000000): """ Perform Batch Gradient Descent :param data: Raw Data frame parsed from CSV :param learning_rate: The rate at which to advance along the gradient :param training_data_ratio: The percent of given data to use for training (remaining percent is used for testing) :param max_iterations: The maximum number of iterations to execute before exiting :return: Nothing """ # 2. Randomizes the data print "Randomizing Data" randomized_data = util.randomize_data(data) # 3. Selects the first 2 / 3 (round up) of the data for training and the remaining for testing print "Selecting Training Data" training_data, test_data = util.split_data(randomized_data, training_data_ratio) # 4. Standardizes the data(except for the last column of course) base on the training data print "Standardizing Data" std_training_data, mean, std = util.standardize_data( util.get_features(training_data)) std_training_data.insert(0, "Bias", 1) std_test_data, _, _ = util.standardize_data(util.get_features(test_data), mean, std) std_test_data.insert(0, "Bias", 1) iteration = 0 prior_rmse = 0 current_rmse = 100 # Doesn't matter what this value is, so long as it doesn't equal prior rmse eps = np.spacing(1) N = len(std_training_data) # Start with randomized values for theta theta = np.array([random.uniform(-1, 1) for _ in xrange(0, 3)]) # Capture our expected values for the training data expected = util.get_output(training_data) test_data_expected = util.get_output(test_data) # Capture the RMSE for test and training over all iterations test_rmse_values = [] training_rmse_values = [] print "Performing Gradient Descent Linear Regression" # 5. While the termination criteria (mentioned above in the implementation details) hasn't been met while iteration <= max_iterations and abs(current_rmse - prior_rmse) >= eps: prior_rmse = current_rmse # (a) Compute the RMSE of the training data # By applying the current theta values to the training set & comparing results actual = std_training_data.dot(theta) current_rmse = util.compute_rmse(expected, actual) # (b) While we can't let the testing set affect our training process, also compute the RMSE of # the testing error at each iteration of the algorithm (it'll be interesting to see). # Same thing as (a), but use test inputs / outputs test_data_actual = std_test_data.dot(theta) test_data_rmse = util.compute_rmse(test_data_expected, test_data_actual) # (c) Update each parameter using batch gradient descent # By use of the learning rate for i in xrange(len(theta)): # We know the length of theta is the same as the num columns in std_training_data errors = (actual - expected ) * std_training_data[std_training_data.columns[i]] cumulative_error = errors.sum() theta[i] -= learning_rate / N * cumulative_error iteration += 1 test_rmse_values.append(test_data_rmse) training_rmse_values.append(current_rmse) print "Completed in {0} iterations".format(iteration) print "Plotting Errors" image_path = plot_rmse_values(test_rmse_values, training_rmse_values, learning_rate) print "Saved Image to '{0}'".format(image_path) # 6. Compute the RMSE of the testing data. print "Computing RMSE of Test Data" test_data_actual = std_test_data.dot(theta) test_data_rmse = util.compute_rmse(test_data_expected, test_data_actual) return theta, test_data_rmse
def run(self, path, *args, **kwargs): out = get_output(["du", "-chs", "--exclude=.git", path]) return out.splitlines()[-1].split()[0]
def check_hw_config(host_cfg: Path, hardware_cfg: Path, old_hardware_cfg: Path) -> None: """Check that ``hardware_cfg`` exists and ``old_hardware_cfg`` doesn't. Also imports ``hardware_cfg`` in ``cfg`` if it's not otherwise imported. """ # `old_hardware_cfg` is repo_root / "hardware-configuration.nix"; NixOS # might generate one by default while installing, so this check should stay # here even though all of my hosts have been using this script for a while # now. if old_hardware_cfg.is_symlink(): warn( f"{p(old_hardware_cfg)} is a symlink to " + p(hardware_cfg.parent / os.readlink(old_hardware_cfg)) ) warn("That's probably not needed; consider deleting it.") elif old_hardware_cfg.exists(): if not hardware_cfg.exists(): info( f"{p(old_hardware_cfg)} exists but {p(hardware_cfg)}" + f" doesn't, renaming {p(old_hardware_cfg)}." ) if not DRY_RUN: old_hardware_cfg.rename(hardware_cfg) else: if filecmp.cmp(hardware_cfg, old_hardware_cfg): info( f"{p(old_hardware_cfg)} and {p(hardware_cfg)} " + "both exist but have the same contents, removing " + p(old_hardware_cfg) ) if not DRY_RUN: old_hardware_cfg.unlink() else: error( f"{p(old_hardware_cfg)} and {p(hardware_cfg)} " + "both exist but have different contents." ) error( f"Determine which one is correct and move it to {p(hardware_cfg)}, " + f"and then delete {p(old_hardware_cfg)}" ) # Otherwise, if we don't have a hardware configuration yet, generate one. if not hardware_cfg.exists(): update_hw_config_force(hardware_cfg) # `hardware_cfg`, relative to `cfg`, in a nix-import-friendly manner hardware_cfg_rel = f"./{hardware_cfg.name}" cfg_text = host_cfg.read_text(encoding="utf-8") if hardware_cfg_rel in cfg_text: dbg(f"It looks like {p(host_cfg)} already imports {p(hardware_cfg)}") else: info(f"{p(host_cfg)} doesn't import {p(hardware_cfg)}, attempting to add it") # Make the substitution... new_cfg_text, found_imports = re.subn( r"^\s*imports\s*=\s*\[", r"\g<0> " + hardware_cfg_rel + " ", cfg_text, count=1, flags=re.MULTILINE, ) if not found_imports: error( f"Couldn't find a suitable import statement in {p(host_cfg)}; " + f"make sure to add an import to {p(hardware_cfg)}" ) else: info(f"Writing and reformatting {p(host_cfg)}") if not DRY_RUN: host_cfg.write_text(new_cfg_text) nixfmt_output = get_output(["nixfmt", str(host_cfg)]) if nixfmt_output: dbg("nixfmt reported:") for line in nixfmt_output.splitlines(): dbg(line)