def slurm_prep(log_dir, niter=10000, partition_name='debug', this_dir=os.getcwd()): """ Prep the SLRUM runs. @param log_dir: The directory to store the results in. @param niter: The number of iterations to perform. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Get the configuration details static_config, dynamic_config = parallel_params(log_dir, niter) # Create the runs for i in xrange(1, niter + 1): # Build the initial params params = {k:v.rvs() for k, v in sorted(dynamic_config.items())} for k, v in static_config.items(): params[k] = v # Create the base directory dir = params['log_dir'] splits = os.path.basename(dir).split('-') dir = os.path.join(os.path.dirname(dir), '-'.join(s for s in splits[:-1])) try: os.makedirs(dir) except OSError: pass # Dump the params as JSON s = json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Create the runner mnist_runner_path = os.path.join(this_dir, 'mnist_novelty_detection.py') command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir) runner_path = os.path.join(dir, 'runner.sh') job_name = str(i) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit='00-00:45:00', memory_limit=512) # Execute the runner execute_runner(runner_path)
def launch_top_runs(top_paths, bp, command, auto_pupdate=False, partition_name='debug', time_limit='04-00:00:00', memory_limit=2048): """ Launch the top runs. @param top_paths: The full path to the base directory containing the top results. @param bp: The new base directory. @param command: The base command to execute in the runner. Two additional arguments will be passed - the base directory and the fold index. @param auto_pupdate: If True the permanence increment and decrement amounts will automatically be computed by the runner. If False, the ones specified in the config file will be used. @param partition_name: The partition name to use. @param time_limit: The maximum time limit. @param memory_limit: The maximum memory requirements in MB. """ for p in top_paths: # Path where the run should occur job_name = os.path.basename(p) p2 = os.path.join(bp, job_name) try: os.makedirs(p2) except OSError: pass # Overwrite the files # Create the runner runner_path = os.path.join(p2, 'runner.sh') command_new = '{0} "{1}" "{2}" {3}'.format(command, p, p2, int(auto_pupdate)) stdio_path = os.path.join(p2, 'stdio.txt') stderr_path = os.path.join(p2, 'stderr.txt') create_runner(command=command_new, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit, memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def launch_missing(missing, command, partition_name='debug', time_limit='00-04:00:00', memory_limit=512): """Launch the missing results on the cluster. Assumes that the convention <run_instance>-<fold_instance> for the directories was utilized. @param missing: The missing items. @param command: The base command to execute in the runner. Two additional arguments will be passed - the base directory and the fold index. @param partition_name: The partition name to use. @param time_limit: The maximum time limit. @param memory_limit: The maximum memory requirements in MB. """ # Execute each missing item for p in missing: # Build the SP kargs for the proper path bn, ix = os.path.basename(p).split('-') bp = os.path.join(os.path.dirname(p), bn) with open(os.path.join(bp, 'config.json'), 'r') as f: kargs = json.load(f) kargs['log_dir'] = p # Dump the arguments to a new file s = json.dumps(kargs, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(bp, 'config-{0}.json'.format(ix)), 'w') as f: f.write(s) # Create the runner runner_path = os.path.join(bp, 'runner-{0}.sh'.format(ix)) job_name = os.path.basename(p) command_new = '{0} "{1}" {2}'.format(command, bp, ix) stdio_path = os.path.join(bp, 'stdio-{0}.txt'.format(ix)) stderr_path = os.path.join(bp, 'stderr-{0}.txt'.format(ix)) create_runner(command=command_new, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit, memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def slurm_prep(log_dir, partition_name='debug', this_dir=os.getcwd()): """ Prep the SLRUM runs. @param log_dir: The directory to store the results in. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Create the runs i = 1 for noise in np.linspace(0, 1, 101): for overlap in np.arange(0, 41): dir = os.path.join(log_dir, '{0}-{1}'.format(noise, overlap)) # Create the base directory try: os.makedirs(dir) except OSError: pass # Dump the params as JSON s = json.dumps(create_base_config(dir), sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Create the runner mnist_runner_path = os.path.join(this_dir, 'novelty_detection_slurm.py') command = 'python "{0}" "{1}" "{2}" "{3}"'.format( mnist_runner_path, dir, noise, overlap) runner_path = os.path.join(dir, 'runner.sh') job_name = str(i) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit='00-00:10:00', memory_limit=128) # Execute the runner execute_runner(runner_path) i += 1
def launch_missing(missing, command, partition_name='debug', time_limit='00-04:00:00', memory_limit=512): """ Launch the missing results on the cluster. It assumes that the convention <run_instance>-<fold_instance> for the directories was utilized. @param missing: The missing items. @param command: The base command to execute in the runner. Two additional arguments will be passed - the base directory and the fold index. @param partition_name: The partition name to use. @param time_limit: The maximum time limit. @param memory_limit: The maximum memory requirements in MB. """ # Execute each missing item for p in missing: # Build the SP kargs for the proper path bn, ix = os.path.basename(p).split('-') bp = os.path.join(os.path.dirname(p), bn) with open(os.path.join(bp, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['log_dir'] = p # Dump the arguments to a new file s = json.dumps(kargs, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(bp, 'config-{0}.json'.format(ix)), 'wb') as f: f.write(s) # Create the runner runner_path = os.path.join(bp, 'runner-{0}.sh'.format(ix)) job_name = os.path.basename(p) command_new = '{0} "{1}" {2}'.format(command, bp, ix) stdio_path = os.path.join(bp, 'stdio-{0}.txt'.format(ix)) stderr_path = os.path.join(bp, 'stderr-{0}.txt'.format(ix)) create_runner(command=command_new, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit, memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def main_slurm(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3, global_inhibition=True, partition_name='debug', seed=None): """ Perform CV on a subset of the MNIST dataset, using SLRUM. Iterations will be run in complete parallel. Splits within an iteration will be run sequentially. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param partition_name: The partition name of the cluster to use. @param seed: The seed for the random number generators. """ # Run the initialization x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits, global_inhibition, seed) # Create the runs for i in xrange(1, niter + 1): # Build the initial params param = {k: v.rvs() for k, v in sorted(params.items())} # Create the base directory dir = param['log_dir'] splits = os.path.basename(dir).split('-') dir = os.path.join(os.path.dirname(dir), '-'.join(s for s in splits[:-1])) try: os.makedirs(dir) except OSError: pass # Dump the CV data with open(os.path.join(dir, 'cv.pkl'), 'wb') as f: cPickle.dump(list(cv), f, cPickle.HIGHEST_PROTOCOL) # Build the full params for k, v in kargs.items(): if k != 'clf': # Add the classifier later param[k] = v # Dump the params as JSON s = json.dumps(param, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Create the runner mnist_runner_path = os.path.join( pkgutil.get_loader('mHTM.examples').filename, 'mnist_runner.py') command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir) runner_path = os.path.join(dir, 'runner.sh') job_name = str(i) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path) # Execute the runner execute_runner(runner_path)
def run_experiment(experiments, base_dir, nsamples=500, nbits=100, pct_active=0.4, pct_noise=0.15, seed=123456789, ntrials=10, partition_name='debug', this_dir=os.getcwd()): """Run an experiment for the SP. This experiment is used to vary various sets of parameters on the SP dataset. This function uses SLURM to conduct the experiments. @param experiments: A list containing the experiments details. Refer to one of the examples in this module for more details. @param base_dir: The base directory to use for logging. @param nsamples: The number of samples to add to the dataset. @param nbits: The number of bits each sample should have. @param pct_active: The percentage of bits that will be active in the base class SDR. @param pct_noise: The percentage of noise to add to the data. @param seed: The seed used to initialize the random number generator. @param ntrials: The number of parameter trials to use. Each iteration will be used to initialize the SP in a different manner. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Create the dataset data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Prep each experiment for execution for experiment_name, time_limit, memory_limit, params in experiments: # Iterate through each type of inhibition type for i, global_inhibition in enumerate((True, False)): # Get base configuration base_config = create_base_config(base_dir, experiment_name, global_inhibition) # Add the parameters for param_name, param_value in params: base_config[param_name] = param_value config_gen = ConfigGenerator(base_config, ntrials) # Make the configurations for config in config_gen.get_config(): # Make the base directory dir = config['log_dir'] splits = os.path.basename(dir).split('-') base_name = '-'.join(s for s in splits[:-1]) dir = os.path.join(os.path.dirname(dir), base_name) try: os.makedirs(dir) except OSError: pass # Dump the config as JSON s = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'w') as f: f.write(s) # Dump the dataset and the metrics with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) pickle.dump( (uniqueness_data, overlap_data, correlation_data), f, pickle.HIGHEST_PROTOCOL) # Create the runner this_path = os.path.join(this_dir, 'parameter_exploration.py') command = 'python "{0}" "{1}" {2} {3}'.format( this_path, dir, ntrials, seed) runner_path = os.path.join(dir, 'runner.sh') job_name = '{0}_{1}{2}'.format( experiment_name, 'G' if global_inhibition else 'L', base_name) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit[i], memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def main_slurm(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3, global_inhibition=True, partition_name='debug', seed=None): """ Perform CV on a subset of the MNIST dataset, using SLRUM. Iterations will be run in complete parallel. Splits within an iteration will be run sequentially. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param partition_name: The partition name of the cluster to use. @param seed: The seed for the random number generators. """ # Run the initialization x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits, global_inhibition, seed) # Create the runs for i in xrange(1, niter + 1): # Build the initial params param = {k:v.rvs() for k, v in sorted(params.items())} # Create the base directory dir = param['log_dir'] splits = os.path.basename(dir).split('-') dir = os.path.join(os.path.dirname(dir), '-'.join(s for s in splits[:-1])) try: os.makedirs(dir) except OSError: pass # Dump the CV data with open(os.path.join(dir, 'cv.pkl'), 'wb') as f: cPickle.dump(list(cv), f, cPickle.HIGHEST_PROTOCOL) # Build the full params for k, v in kargs.items(): if k != 'clf': # Add the classifier later param[k] = v # Dump the params as JSON s = json.dumps(param, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Create the runner mnist_runner_path = os.path.join(pkgutil.get_loader('mHTM.examples'). filename, 'mnist_runner.py') command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir) runner_path = os.path.join(dir, 'runner.sh') job_name = str(i) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path) # Execute the runner execute_runner(runner_path)
def run_experiment(experiments, base_dir, nsamples=500, nbits=100, pct_active=0.4, pct_noise=0.15, seed=123456789, ntrials=10, partition_name='debug', this_dir=os.getcwd()): """ Run an experiment for the SP. This experiment is used to vary various sets of parameters on the SP dataset. This function uses SLURM to conduct the experiments. @param experiments: A list containing the experiments details. Refer to one of the examples in this module for more details. @param base_dir: The base directory to use for logging. @param nsamples: The number of samples to add to the dataset. @param nbits: The number of bits each sample should have. @param pct_active: The percentage of bits that will be active in the base class SDR. @param pct_noise: The percentage of noise to add to the data. @param seed: The seed used to initialize the random number generator. @param ntrials: The number of parameter trials to use. Each iteration will be used to initialize the SP in a different manner. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Create the dataset data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Prep each experiment for execution for experiment_name, time_limit, memory_limit, params in experiments: # Iterate through each type of inhibition type for i, global_inhibition in enumerate((True, False)): # Get base configuration base_config = create_base_config(base_dir, experiment_name, global_inhibition) # Add the parameters for param_name, param_value in params: base_config[param_name] = param_value config_gen = ConfigGenerator(base_config, ntrials) # Make the configurations for config in config_gen.get_config(): # Make the base directory dir = config['log_dir'] splits = os.path.basename(dir).split('-') base_name = '-'.join(s for s in splits[:-1]) dir = os.path.join(os.path.dirname(dir), base_name) try: os.makedirs(dir) except OSError: pass # Dump the config as JSON s = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Dump the dataset and the metrics with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f: cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) cPickle.dump((uniqueness_data, overlap_data, correlation_data), f, cPickle.HIGHEST_PROTOCOL) # Create the runner this_path = os.path.join(this_dir, 'parameter_exploration.py') command = 'python "{0}" "{1}" {2} {3}'.format(this_path, dir, ntrials, seed) runner_path = os.path.join(dir, 'runner.sh') job_name = '{0}_{1}{2}'.format(experiment_name, 'G' if global_inhibition else 'L', base_name) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit[i], memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)