Ejemplo n.º 1
0
def slurm_prep(log_dir, niter=10000, partition_name='debug',
	this_dir=os.getcwd()):
	"""
	Prep the SLRUM runs.
	
	@param log_dir: The directory to store the results in.
	
	@param niter: The number of iterations to perform.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param this_dir: The full path to the directory where this file is located.
	"""
	
	# Get the configuration details
	static_config, dynamic_config = parallel_params(log_dir, niter)
	
	# Create the runs
	for i in xrange(1, niter + 1):
		# Build the initial params
		params = {k:v.rvs() for k, v in sorted(dynamic_config.items())}
		for k, v in static_config.items():
			params[k] = v
		
		# Create the base directory
		dir = params['log_dir']
		splits = os.path.basename(dir).split('-')
		dir = os.path.join(os.path.dirname(dir),
			'-'.join(s for s in splits[:-1]))
		try:
			os.makedirs(dir)
		except OSError:
			pass
		
		# Dump the params as JSON
		s = json.dumps(params, sort_keys=True, indent=4,
			separators=(',', ': ')).replace('},', '},\n')
		with open(os.path.join(dir, 'config.json'), 'wb') as f:
			f.write(s)
		
		# Create the runner
		mnist_runner_path = os.path.join(this_dir,
			'mnist_novelty_detection.py')
		command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir)
		runner_path = os.path.join(dir, 'runner.sh')
		job_name = str(i)
		stdio_path = os.path.join(dir, 'stdio.txt')
		stderr_path = os.path.join(dir, 'stderr.txt')
		create_runner(command=command, runner_path=runner_path,
			job_name=job_name, partition_name=partition_name,
			stdio_path=stdio_path, stderr_path=stderr_path,
			time_limit='00-00:45:00', memory_limit=512)
		
		# Execute the runner
		execute_runner(runner_path)
Ejemplo n.º 2
0
def launch_top_runs(top_paths,
                    bp,
                    command,
                    auto_pupdate=False,
                    partition_name='debug',
                    time_limit='04-00:00:00',
                    memory_limit=2048):
    """
	Launch the top runs.
	
	@param top_paths: The full path to the base directory containing the top
	results.
	
	@param bp: The new base directory.
	
	@param command: The base command to execute in the runner. Two additional
	arguments will be passed - the base directory and the fold index.
	
	@param auto_pupdate: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	
	@param partition_name: The partition name to use.
	
	@param time_limit: The maximum time limit.
	
	@param memory_limit: The maximum memory requirements in MB.
	"""

    for p in top_paths:
        # Path where the run should occur
        job_name = os.path.basename(p)
        p2 = os.path.join(bp, job_name)
        try:
            os.makedirs(p2)
        except OSError:
            pass  # Overwrite the files

        # Create the runner
        runner_path = os.path.join(p2, 'runner.sh')
        command_new = '{0} "{1}" "{2}" {3}'.format(command, p, p2,
                                                   int(auto_pupdate))
        stdio_path = os.path.join(p2, 'stdio.txt')
        stderr_path = os.path.join(p2, 'stderr.txt')
        create_runner(command=command_new,
                      runner_path=runner_path,
                      job_name=job_name,
                      partition_name=partition_name,
                      stdio_path=stdio_path,
                      stderr_path=stderr_path,
                      time_limit=time_limit,
                      memory_limit=memory_limit)

        # Execute the runner
        execute_runner(runner_path)
Ejemplo n.º 3
0
def launch_missing(missing,
                   command,
                   partition_name='debug',
                   time_limit='00-04:00:00',
                   memory_limit=512):
    """Launch the missing results on the cluster.

    Assumes that the convention <run_instance>-<fold_instance> for the
    directories was utilized.

    @param missing: The missing items.

    @param command: The base command to execute in the runner. Two additional
    arguments will be passed - the base directory and the fold index.

    @param partition_name: The partition name to use.

    @param time_limit: The maximum time limit.

    @param memory_limit: The maximum memory requirements in MB.
    """
    # Execute each missing item
    for p in missing:
        # Build the SP kargs for the proper path
        bn, ix = os.path.basename(p).split('-')
        bp = os.path.join(os.path.dirname(p), bn)
        with open(os.path.join(bp, 'config.json'), 'r') as f:
            kargs = json.load(f)
        kargs['log_dir'] = p

        # Dump the arguments to a new file
        s = json.dumps(kargs, sort_keys=True, indent=4,
                       separators=(',', ': ')).replace('},', '},\n')
        with open(os.path.join(bp, 'config-{0}.json'.format(ix)), 'w') as f:
            f.write(s)

        # Create the runner
        runner_path = os.path.join(bp, 'runner-{0}.sh'.format(ix))
        job_name = os.path.basename(p)
        command_new = '{0} "{1}" {2}'.format(command, bp, ix)
        stdio_path = os.path.join(bp, 'stdio-{0}.txt'.format(ix))
        stderr_path = os.path.join(bp, 'stderr-{0}.txt'.format(ix))
        create_runner(command=command_new,
                      runner_path=runner_path,
                      job_name=job_name,
                      partition_name=partition_name,
                      stdio_path=stdio_path,
                      stderr_path=stderr_path,
                      time_limit=time_limit,
                      memory_limit=memory_limit)

        # Execute the runner
        execute_runner(runner_path)
def slurm_prep(log_dir, partition_name='debug', this_dir=os.getcwd()):
    """
	Prep the SLRUM runs.
	
	@param log_dir: The directory to store the results in.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param this_dir: The full path to the directory where this file is located.
	"""

    # Create the runs
    i = 1
    for noise in np.linspace(0, 1, 101):
        for overlap in np.arange(0, 41):
            dir = os.path.join(log_dir, '{0}-{1}'.format(noise, overlap))

            # Create the base directory
            try:
                os.makedirs(dir)
            except OSError:
                pass

            # Dump the params as JSON
            s = json.dumps(create_base_config(dir),
                           sort_keys=True,
                           indent=4,
                           separators=(',', ': ')).replace('},', '},\n')
            with open(os.path.join(dir, 'config.json'), 'wb') as f:
                f.write(s)

            # Create the runner
            mnist_runner_path = os.path.join(this_dir,
                                             'novelty_detection_slurm.py')
            command = 'python "{0}" "{1}" "{2}" "{3}"'.format(
                mnist_runner_path, dir, noise, overlap)
            runner_path = os.path.join(dir, 'runner.sh')
            job_name = str(i)
            stdio_path = os.path.join(dir, 'stdio.txt')
            stderr_path = os.path.join(dir, 'stderr.txt')
            create_runner(command=command,
                          runner_path=runner_path,
                          job_name=job_name,
                          partition_name=partition_name,
                          stdio_path=stdio_path,
                          stderr_path=stderr_path,
                          time_limit='00-00:10:00',
                          memory_limit=128)

            # Execute the runner
            execute_runner(runner_path)

            i += 1
Ejemplo n.º 5
0
def slurm_prep(log_dir, partition_name='debug', this_dir=os.getcwd()):
	"""
	Prep the SLRUM runs.
	
	@param log_dir: The directory to store the results in.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param this_dir: The full path to the directory where this file is located.
	"""
	
	# Create the runs
	i = 1
	for noise in np.linspace(0, 1, 101):
		for overlap in np.arange(0, 41):
			dir = os.path.join(log_dir, '{0}-{1}'.format(noise, overlap))
			
			# Create the base directory
			try:
				os.makedirs(dir)
			except OSError:
				pass
			
			# Dump the params as JSON
			s = json.dumps(create_base_config(dir), sort_keys=True,
				indent=4, separators=(',', ': ')).replace('},', '},\n')
			with open(os.path.join(dir, 'config.json'), 'wb') as f:
				f.write(s)
			
			# Create the runner
			mnist_runner_path = os.path.join(this_dir,
				'novelty_detection_slurm.py')
			command = 'python "{0}" "{1}" "{2}" "{3}"'.format(
				mnist_runner_path, dir, noise, overlap)
			runner_path = os.path.join(dir, 'runner.sh')
			job_name = str(i)
			stdio_path = os.path.join(dir, 'stdio.txt')
			stderr_path = os.path.join(dir, 'stderr.txt')
			create_runner(command=command, runner_path=runner_path,
				job_name=job_name, partition_name=partition_name,
				stdio_path=stdio_path, stderr_path=stderr_path,
				time_limit='00-00:10:00', memory_limit=128)
			
			# Execute the runner
			execute_runner(runner_path)
			
			i += 1
Ejemplo n.º 6
0
def launch_missing(missing, command, partition_name='debug',
	time_limit='00-04:00:00', memory_limit=512):
	"""
	Launch the missing results on the cluster. It assumes that the convention
	<run_instance>-<fold_instance> for the directories was utilized.
	
	@param missing: The missing items.
	
	@param command: The base command to execute in the runner. Two additional
	arguments will be passed - the base directory and the fold index.
	
	@param partition_name: The partition name to use.
	
	@param time_limit: The maximum time limit.
	
	@param memory_limit: The maximum memory requirements in MB.
	"""
	
	# Execute each missing item
	for p in missing:
		# Build the SP kargs for the proper path
		bn, ix = os.path.basename(p).split('-')
		bp = os.path.join(os.path.dirname(p), bn)
		with open(os.path.join(bp, 'config.json'), 'rb') as f:
			kargs = json.load(f)
		kargs['log_dir'] = p
		
		# Dump the arguments to a new file
		s = json.dumps(kargs, sort_keys=True, indent=4,
			separators=(',', ': ')).replace('},', '},\n')
		with open(os.path.join(bp, 'config-{0}.json'.format(ix)), 'wb') as f:
			f.write(s)
		
		# Create the runner
		runner_path = os.path.join(bp, 'runner-{0}.sh'.format(ix))
		job_name = os.path.basename(p)
		command_new = '{0} "{1}" {2}'.format(command, bp, ix)
		stdio_path = os.path.join(bp, 'stdio-{0}.txt'.format(ix))
		stderr_path = os.path.join(bp, 'stderr-{0}.txt'.format(ix))
		create_runner(command=command_new, runner_path=runner_path,
			job_name=job_name, partition_name=partition_name,
			stdio_path=stdio_path, stderr_path=stderr_path,
			time_limit=time_limit, memory_limit=memory_limit)
		
		# Execute the runner
		execute_runner(runner_path)
Ejemplo n.º 7
0
def launch_top_runs(top_paths, bp, command, auto_pupdate=False,
	partition_name='debug', time_limit='04-00:00:00', memory_limit=2048):
	"""
	Launch the top runs.
	
	@param top_paths: The full path to the base directory containing the top
	results.
	
	@param bp: The new base directory.
	
	@param command: The base command to execute in the runner. Two additional
	arguments will be passed - the base directory and the fold index.
	
	@param auto_pupdate: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	
	@param partition_name: The partition name to use.
	
	@param time_limit: The maximum time limit.
	
	@param memory_limit: The maximum memory requirements in MB.
	"""
	
	for p in top_paths:
		# Path where the run should occur
		job_name = os.path.basename(p)
		p2 = os.path.join(bp, job_name)
		try:
			os.makedirs(p2)
		except OSError:
			pass # Overwrite the files
		
		# Create the runner
		runner_path = os.path.join(p2, 'runner.sh')
		command_new = '{0} "{1}" "{2}" {3}'.format(command, p, p2,
			int(auto_pupdate))
		stdio_path = os.path.join(p2, 'stdio.txt')
		stderr_path = os.path.join(p2, 'stderr.txt')
		create_runner(command=command_new, runner_path=runner_path,
			job_name=job_name, partition_name=partition_name,
			stdio_path=stdio_path, stderr_path=stderr_path,
			time_limit=time_limit, memory_limit=memory_limit)
		
		# Execute the runner
		execute_runner(runner_path)
Ejemplo n.º 8
0
def main_slurm(log_dir,
               ntrain=800,
               ntest=200,
               niter=5,
               nsplits=3,
               global_inhibition=True,
               partition_name='debug',
               seed=None):
    """
	Perform CV on a subset of the MNIST dataset, using SLRUM. Iterations will
	be run in complete parallel. Splits within an iteration will be run
	sequentially.
	
	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param seed: The seed for the random number generators.
	"""

    # Run the initialization
    x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits,
                                   global_inhibition, seed)

    # Create the runs
    for i in xrange(1, niter + 1):
        # Build the initial params
        param = {k: v.rvs() for k, v in sorted(params.items())}

        # Create the base directory
        dir = param['log_dir']
        splits = os.path.basename(dir).split('-')
        dir = os.path.join(os.path.dirname(dir),
                           '-'.join(s for s in splits[:-1]))
        try:
            os.makedirs(dir)
        except OSError:
            pass

        # Dump the CV data
        with open(os.path.join(dir, 'cv.pkl'), 'wb') as f:
            cPickle.dump(list(cv), f, cPickle.HIGHEST_PROTOCOL)

        # Build the full params
        for k, v in kargs.items():
            if k != 'clf':  # Add the classifier later
                param[k] = v

        # Dump the params as JSON
        s = json.dumps(param, sort_keys=True, indent=4,
                       separators=(',', ': ')).replace('},', '},\n')
        with open(os.path.join(dir, 'config.json'), 'wb') as f:
            f.write(s)

        # Create the runner
        mnist_runner_path = os.path.join(
            pkgutil.get_loader('mHTM.examples').filename, 'mnist_runner.py')
        command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir)
        runner_path = os.path.join(dir, 'runner.sh')
        job_name = str(i)
        stdio_path = os.path.join(dir, 'stdio.txt')
        stderr_path = os.path.join(dir, 'stderr.txt')
        create_runner(command=command,
                      runner_path=runner_path,
                      job_name=job_name,
                      partition_name=partition_name,
                      stdio_path=stdio_path,
                      stderr_path=stderr_path)

        # Execute the runner
        execute_runner(runner_path)
Ejemplo n.º 9
0
def run_experiment(experiments,
                   base_dir,
                   nsamples=500,
                   nbits=100,
                   pct_active=0.4,
                   pct_noise=0.15,
                   seed=123456789,
                   ntrials=10,
                   partition_name='debug',
                   this_dir=os.getcwd()):
    """Run an experiment for the SP.

    This experiment is used to vary various sets of parameters on the SP
    dataset. This function uses SLURM to conduct the experiments.

    @param experiments: A list containing the experiments details. Refer to one
    of the examples in this module for more details.

    @param base_dir: The base directory to use for logging.

    @param nsamples: The number of samples to add to the dataset.

    @param nbits: The number of bits each sample should have.

    @param pct_active: The percentage of bits that will be active in the base
    class SDR.

    @param pct_noise: The percentage of noise to add to the data.

    @param seed: The seed used to initialize the random number generator.

    @param ntrials: The number of parameter trials to use. Each iteration will
    be used to initialize the SP in a different manner.

    @param partition_name: The partition name of the cluster to use.

    @param this_dir: The full path to the directory where this file is located.
    """
    # Create the dataset
    data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the dataset
    uniqueness_data = metrics.compute_uniqueness(data)
    overlap_data = metrics.compute_overlap(data)
    correlation_data = 1 - metrics.compute_distance(data)

    # Prep each experiment for execution
    for experiment_name, time_limit, memory_limit, params in experiments:
        # Iterate through each type of inhibition type
        for i, global_inhibition in enumerate((True, False)):
            # Get base configuration
            base_config = create_base_config(base_dir, experiment_name,
                                             global_inhibition)

            # Add the parameters
            for param_name, param_value in params:
                base_config[param_name] = param_value
                config_gen = ConfigGenerator(base_config, ntrials)

            # Make the configurations
            for config in config_gen.get_config():
                # Make the base directory
                dir = config['log_dir']
                splits = os.path.basename(dir).split('-')
                base_name = '-'.join(s for s in splits[:-1])
                dir = os.path.join(os.path.dirname(dir), base_name)
                try:
                    os.makedirs(dir)
                except OSError:
                    pass

                # Dump the config as JSON
                s = json.dumps(config,
                               sort_keys=True,
                               indent=4,
                               separators=(',', ': ')).replace('},', '},\n')
                with open(os.path.join(dir, 'config.json'), 'w') as f:
                    f.write(s)

                # Dump the dataset and the metrics
                with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f:
                    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
                    pickle.dump(
                        (uniqueness_data, overlap_data, correlation_data), f,
                        pickle.HIGHEST_PROTOCOL)

                # Create the runner
                this_path = os.path.join(this_dir, 'parameter_exploration.py')
                command = 'python "{0}" "{1}" {2} {3}'.format(
                    this_path, dir, ntrials, seed)
                runner_path = os.path.join(dir, 'runner.sh')
                job_name = '{0}_{1}{2}'.format(
                    experiment_name, 'G' if global_inhibition else 'L',
                    base_name)
                stdio_path = os.path.join(dir, 'stdio.txt')
                stderr_path = os.path.join(dir, 'stderr.txt')
                create_runner(command=command,
                              runner_path=runner_path,
                              job_name=job_name,
                              partition_name=partition_name,
                              stdio_path=stdio_path,
                              stderr_path=stderr_path,
                              time_limit=time_limit[i],
                              memory_limit=memory_limit)

                # Execute the runner
                execute_runner(runner_path)
Ejemplo n.º 10
0
def main_slurm(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3,
	global_inhibition=True, partition_name='debug', seed=None):
	"""
	Perform CV on a subset of the MNIST dataset, using SLRUM. Iterations will
	be run in complete parallel. Splits within an iteration will be run
	sequentially.
	
	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param seed: The seed for the random number generators.
	"""
	
	# Run the initialization
	x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits,
		global_inhibition, seed)
	
	# Create the runs
	for i in xrange(1, niter + 1):
		# Build the initial params
		param = {k:v.rvs() for k, v in sorted(params.items())}
		
		# Create the base directory
		dir = param['log_dir']
		splits = os.path.basename(dir).split('-')
		dir = os.path.join(os.path.dirname(dir),
			'-'.join(s for s in splits[:-1]))
		try:
			os.makedirs(dir)
		except OSError:
			pass
		
		# Dump the CV data
		with open(os.path.join(dir, 'cv.pkl'), 'wb') as f:
			cPickle.dump(list(cv), f, cPickle.HIGHEST_PROTOCOL)
		
		# Build the full params
		for k, v in kargs.items():
			if k != 'clf': # Add the classifier later
				param[k] = v
		
		# Dump the params as JSON
		s = json.dumps(param, sort_keys=True, indent=4,
			separators=(',', ': ')).replace('},', '},\n')
		with open(os.path.join(dir, 'config.json'), 'wb') as f:
			f.write(s)
		
		# Create the runner
		mnist_runner_path = os.path.join(pkgutil.get_loader('mHTM.examples').
			filename, 'mnist_runner.py')
		command = 'python "{0}" "{1}"'.format(mnist_runner_path, dir)
		runner_path = os.path.join(dir, 'runner.sh')
		job_name = str(i)
		stdio_path = os.path.join(dir, 'stdio.txt')
		stderr_path = os.path.join(dir, 'stderr.txt')
		create_runner(command=command, runner_path=runner_path,
			job_name=job_name, partition_name=partition_name,
			stdio_path=stdio_path, stderr_path=stderr_path)
		
		# Execute the runner
		execute_runner(runner_path)
Ejemplo n.º 11
0
def run_experiment(experiments, base_dir, nsamples=500, nbits=100,
	pct_active=0.4, pct_noise=0.15, seed=123456789, ntrials=10,
	partition_name='debug', this_dir=os.getcwd()):
	"""
	Run an experiment for the SP. This experiment is used to vary various sets
	of parameters on the SP dataset. This function uses SLURM to conduct the
	experiments.
	
	@param experiments: A list containing the experiments details. Refer to one
	of the examples in this module for more details.
	
	@param base_dir: The base directory to use for logging.
	
	@param nsamples: The number of samples to add to the dataset.
	
	@param nbits: The number of bits each sample should have.
	
	@param pct_active: The percentage of bits that will be active in the base
	class SDR.
	
	@param pct_noise: The percentage of noise to add to the data.
	
	@param seed: The seed used to initialize the random number generator.
	
	@param ntrials: The number of parameter trials to use. Each iteration will
	be used to initialize the SP in a different manner.
	
	@param partition_name: The partition name of the cluster to use.
	
	@param this_dir: The full path to the directory where this file is located.
	"""
	
	# Create the dataset
	data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the dataset
	uniqueness_data = metrics.compute_uniqueness(data)
	overlap_data = metrics.compute_overlap(data)
	correlation_data = 1 - metrics.compute_distance(data)
	
	# Prep each experiment for execution
	for experiment_name, time_limit, memory_limit, params in experiments:
		# Iterate through each type of inhibition type
		for i, global_inhibition in enumerate((True, False)):
			# Get base configuration
			base_config = create_base_config(base_dir, experiment_name,
				global_inhibition)
			
			# Add the parameters
			for param_name, param_value in params:
				base_config[param_name] = param_value
				config_gen = ConfigGenerator(base_config, ntrials)
			
			# Make the configurations
			for config in config_gen.get_config():
				# Make the base directory
				dir = config['log_dir']
				splits = os.path.basename(dir).split('-')
				base_name = '-'.join(s for s in splits[:-1])
				dir = os.path.join(os.path.dirname(dir), base_name)
				try:
					os.makedirs(dir)
				except OSError:
					pass
				
				# Dump the config as JSON
				s = json.dumps(config, sort_keys=True, indent=4,
					separators=(',', ': ')).replace('},', '},\n')
				with open(os.path.join(dir, 'config.json'), 'wb') as f:
					f.write(s)
				
				# Dump the dataset and the metrics
				with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f:
					cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL)
					cPickle.dump((uniqueness_data, overlap_data,
						correlation_data), f, cPickle.HIGHEST_PROTOCOL)
				
				# Create the runner
				this_path = os.path.join(this_dir, 'parameter_exploration.py')
				command = 'python "{0}" "{1}" {2} {3}'.format(this_path, dir,
					ntrials, seed)
				runner_path = os.path.join(dir, 'runner.sh')
				job_name = '{0}_{1}{2}'.format(experiment_name, 'G' if
					global_inhibition else 'L', base_name)
				stdio_path = os.path.join(dir, 'stdio.txt')
				stderr_path = os.path.join(dir, 'stderr.txt')
				create_runner(command=command, runner_path=runner_path,
					job_name=job_name, partition_name=partition_name,
					stdio_path=stdio_path, stderr_path=stderr_path,
					time_limit=time_limit[i], memory_limit=memory_limit)
				
				# Execute the runner
				execute_runner(runner_path)