Exemple #1
0
	def go(kargs, type):
		"""
		Execute SP.
		
		@param kargs: The params for the SP.
		
		@param type: Run type.
		
		@return: The new sp and the execution time.
		"""
		
		# Run the SP
		t = time.time()
		for _ in xrange(n_iters):
			sp = SPRegion(**kargs)
			sp.c_sboost = 0 # Ensure that no permanence boosting occurs
			sp.execute(ds, store=False)
		t = time.time() - t
		
		# Dump the permanence matrix
		with open(os.path.join(p, '{0}-permanence.pkl'.format(type)), 'wb') \
			as f:
			cPickle.dump(sp.p, f, cPickle.HIGHEST_PROTOCOL)
		
		# Dump the details
		kargs['density'] = density
		kargs['seed'] = seed
		kargs['time'] = t
		with open(os.path.join(p, '{0}-details.json'.format(type)), 'wb') as f:
			f.write(json.dumps(kargs, sort_keys=True, indent=4,
				separators=(',', ': ')))
		
		return sp, t
Exemple #2
0
def main():

    print "Early fused binary SDRs processing"
    hl.tic()
    # Parameters to construct cortical structure
    nbits, pct_active, nobjs = 2048, 0.4, 51
    nofcols = 2048

    # Binary path
    bin_path_efussion = '/home/neurobot/Datasets/mmodal_washington_io/efusion/'
    befusion_header, sefusion_header = 'befusion', 'sefusion'

    # Spool path
    spool_efus = '/home/neurobot/Datasets/spool_washington_io/mm_early_spool/'

    kargs = {
        'ninputs': nbits,
        'ncolumns': nofcols,
        'nactive': int(nbits * 0.2),
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 100,
        'seg_th': 10,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10
    }

    sp = SPRegion(**kargs)

    # Change the path according to modality type
    data_path, data_header = bin_path_efussion, befusion_header
    sdr_path, sdr_header = spool_efus, sefusion_header

    for j in range(nobjs):
        obj_str = str(j + 1)
        obj_path = data_path + obj_str + '.mat'
        data_content = hl.extract_mat_content(obj_path, data_header)
        num_of_imgs, length = data_content.shape
        sdrs = np.zeros((num_of_imgs, nbits), dtype=np.int64)
        for i in range(num_of_imgs):
            sp.fit(data_content[i])
            sp_output = sp.predict(data_content[i])
            outp = sp_output * 1
            if np.count_nonzero(outp) != int(nbits * 0.2):
                print j + 1, i, np.count_nonzero(outp)
            sdrs[i, :] = outp
        sdr_mat = sdr_path + str(j + 1) + '.mat'
        sio.savemat(sdr_mat, mdict={sdr_header: sdrs})
        print "Spooling for object ", j + 1, " tooks"
        hl.tac()
        print "----------------------------------"
    print "Finished spooling for all objects"
    hl.tac()
Exemple #3
0
def second_level(log_dir, seed=123456789):
    # Get the paths to the data
    paths = []
    for d in os.listdir(log_dir):
        p = os.path.join(log_dir, d)
        if os.path.isdir(p): paths.append(os.path.join(p, 'predictions.pkl'))
    paths = sorted(paths)[:16]

    # Read in the first item, to determine the shape of the data
    tr, te = SPRegion.load_data(paths[0])
    ntrain, ntest = len(tr), len(te)
    n_base_cols = tr.shape[-1]
    ninputs = n_base_cols * len(paths)

    # Read in all of the data
    tr_x = np.zeros((ntrain, ninputs), dtype='bool')
    te_x = np.zeros((ntest, ninputs), dtype='bool')
    for i, p in enumerate(paths):
        tr, te = SPRegion.load_data(p)
        tr_x[:, i * n_base_cols:(i + 1) * n_base_cols] = tr
        te_x[:, i * n_base_cols:(i + 1) * n_base_cols] = te

    # Read in the labels
    tr_y, te_y = SPRegion.load_data(os.path.join(log_dir, 'labels.pkl'))

    # SP arguments
    ncolumns = 4096
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ncolumns,
        'nactive': int(ncolumns * 0.2),
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 100,
        'seg_th': 0,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': os.path.join(log_dir, '2-1'),
        'clf': LinearSVC(random_state=seed)
    }

    # Create the SP
    sp = SPRegion(**kargs)

    # Train the SP
    sp.fit(tr_x, tr_y)

    # Score the SP
    print sp.score(te_x, te_y)
Exemple #4
0
def _main3(params, x):
    """Use by main3 to do the SP training in parallel.

    @param params: The configuration parameters for the SP.

    @param x: The data to train the SP on.

    @return: The SP instance, as well as its predictions on the training data.
    """
    clf = SPRegion(**params)
    clf.fit(x)
    y = np.mean(clf.predict(x), 0)
    y[y >= 0.5] = 1
    y[y < 1] = 0

    return clf, y
Exemple #5
0
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567):
    # Set the configuration parameters for the SP
    ninputs = 784
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ninputs,
        'nactive': 10,
        'global_inhibition': True,
        'trim': False,
        'seed': seed,
        'disable_boost': True,
        'nsynapses': 392,
        'seg_th': 10,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.002,
        'pwindow': 0.01,
        'random_permanence': True,
        'nepochs': 10,
        'clf': LinearSVC(random_state=seed),
        'log_dir': os.path.join('simple_mnist', '1-1')
    }

    # Seed numpy
    np.random.seed(seed)

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)

    # Execute the SP on each fold. Additionally, get results for each fitting
    # method.
    for i, (tr, te) in enumerate(cv):
        # Create the region
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x[tr], y[tr])

        # Test the base classifier
        clf = LinearSVC(random_state=seed)
        clf.fit(x[tr], y[tr])

    # Get a random set of unique inputs from the training set
    inputs = np.zeros((10, ninputs))
    for i in xrange(10):
        ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
        inputs[i] = x[tr][ix]

    # Get the SP's predictions for the inputs
    sp_pred = sp.predict(inputs)

    # Get the reconstruction in the context of the SP
    sp_inputs = sp.reconstruct_input(sp_pred)

    # Make a plot comparing the images
    shape = (28, 28)
    path = os.path.join(sp.log_dir, 'input_reconstruction.png')
    plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)
Exemple #6
0
def one_cv(base_dir, cv_split):
	"""
	Run the MNIST experiment. Only the specified CV split is executed.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	
	@param cv_split: The index for the CV split.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)),
		'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	tr, te = cv[cv_split - 1]
	
	# Remove the split directory, if it exists
	shutil.rmtree(os.path.join(base_dir, str(cv_split)), True)
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(x[tr], y[tr])
	
	# Column accuracy
	clf.score(x[te], y[te])
	
	# Probabilistic accuracy
	clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
	
	# Dimensionality reduction method
	clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
	ndims = len(clf.reduce_dimensions(x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
Exemple #7
0
def full_mnist(base_dir, new_dir, auto_update=False):
	"""
	Execute a full MNIST run using the parameters specified by ix.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config.
	
	@param new_dir: The full path of where the data should be saved.
	
	@param auto_update: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['log_dir'] = new_dir
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()

	# Manually compute the permanence update amounts
	if auto_update:
		# Compute average sum of each training instance
		avg_s = tr_x.sum(1)
		
		# Compute the total average sum
		avg_ts = avg_s.mean()
		
		# Compute the average active probability
		a_p = avg_ts / float(tr_x.shape[1])
		
		# Compute the scaling factor
		scaling_factor = 1 / avg_ts
		
		# Compute the update amounts
		pinc = scaling_factor * (1 / a_p)
		pdec = scaling_factor * (1 / (1 - a_p))
		
		# Update the config
		kargs['pinc'], kargs['pdec'] = pinc, pdec
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(tr_x, tr_y)
	
	# Column accuracy
	clf.score(te_x, te_y)
	
	# Probabilistic accuracy
	clf.score(te_x, te_y, tr_x=tr_x, score_method='prob')
	
	# Dimensionality reduction method
	clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction')
	ndims = len(clf.reduce_dimensions(tr_x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
Exemple #8
0
def main_local(log_dir,
               ntrain=800,
               ntest=200,
               niter=5,
               nsplits=3,
               global_inhibition=True,
               ncores=4,
               seed=None):
    """
	Perform CV on a subset of the MNIST dataset. Performs parallelizations on
	a local machine.
	
	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param ncores: The number of cores to use.
	
	@param seed: The seed for the random number generators.
	"""

    # Run the initialization
    x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits,
                                   seed)

    # Build the classifier for doing CV
    clf = RandomizedSearchCV(
        estimator=SPRegion(**kargs),
        param_distributions=params,
        n_iter=niter,  # Total runs
        n_jobs=ncores,  # Use this many number of cores
        pre_dispatch=1 * ncores,  # Give each core two jobs at a time
        iid=True,  # Data is iid across folds
        cv=cv,  # The CV split for the data
        refit=False,  # Disable fitting best estimator on full dataset
        random_state=seed  # Force same SP across runs
    )

    # Fit the models
    clf.fit(x, y)

    # Extract the CV results
    parameter_names = sorted(clf.grid_scores_[0].parameters.keys())
    parameter_names.pop(parameter_names.index('log_dir'))
    parameter_values = np.zeros((niter, len(parameter_names)))
    results = np.zeros((niter, nsplits))
    for i, score in enumerate(clf.grid_scores_):
        parameter_values[i] = np.array(
            [score.parameters[k] for k in parameter_names])
        results[i] = score.cv_validation_scores

    # Save the CV results
    with open(os.path.join(log_dir, 'cv_results.pkl'), 'wb') as f:
        cPickle.dump((parameter_names, parameter_values, results), f,
                     cPickle.HIGHEST_PROTOCOL)
    with open(os.path.join(log_dir, 'cv_clf.pkl'), 'wb') as f:
        cPickle.dump((clf.grid_scores_, clf.best_score_, clf.best_params_), f,
                     cPickle.HIGHEST_PROTOCOL)
Exemple #9
0
def full_cv(base_dir):
	"""
	Run the MNIST experiment. Each CV split is executed sequentially.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	
	# Execute each run
	for tr, te in cv:
		clf = SPRegion(**kargs)
		clf.fit(x[tr], y[tr])
		
		# Column accuracy
		clf.score(x[te], y[te])
		
		# Probabilistic accuracy
		clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
		
		# Dimensionality reduction method
		clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
		ndims = len(clf.reduce_dimensions(x[0]))
		clf._log_stats('Number of New Dimensions', ndims)
Exemple #10
0
def base_experiment(log_dir, seed=123456789):
    """
	The base experiment.
	
	Build an SP using SPDataset and see how it performs.
	
	@param log_dir: The full path to the log directory.
	
	@param seed: The random seed to use.
	
	@return: Tuple containing: SP uniqueness, input uniqueness, SP overlap,
	input overlap.
	"""

    # Params
    nsamples, nbits, pct_active = 500, 100, 0.4
    kargs = {
        'ninputs': nbits,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': log_dir
    }

    # Seed numpy
    np.random.seed(seed)

    # Build items to store results
    npoints = 11
    pct_noises = np.linspace(0, 1, npoints)
    u_sp, u_ip = np.zeros(npoints), np.zeros(npoints)
    o_sp, o_ip = np.zeros(npoints), np.zeros(npoints)

    # Metrics
    metrics = SPMetrics()

    # Vary input noise
    for i, pct_noise in enumerate(pct_noises):
        # Build the dataset
        ds = SPDataset(nsamples=nsamples,
                       nbits=nbits,
                       pct_active=pct_active,
                       pct_noise=pct_noise,
                       seed=seed)
        x = ds.data

        # Get the dataset stats
        u_ip[i] = metrics.compute_uniqueness(x) * 100
        o_ip[i] = metrics.compute_overlap(x) * 100

        # Build the SP
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x)

        # Get the SP's output SDRs
        sp_output = sp.predict(x)

        # Get the stats
        u_sp[i] = metrics.compute_uniqueness(sp_output) * 100
        o_sp[i] = (metrics.compute_overlap(sp_output) +
                   metrics.compute_overlap(np.logical_not(sp_output))) * 50

        # Log everything
        sp._log_stats('% Input Uniqueness', u_ip[i])
        sp._log_stats('% Input Overlap', o_ip[i])
        sp._log_stats('% SP Uniqueness', u_sp[i])
        sp._log_stats('% SP Overlap', o_sp[i])

    return u_sp, u_ip, o_sp, o_ip
Exemple #11
0
def run_single_experiment(base_dir, ntrials=10, seed=123456789):
    """Run the actual experiment.

    @param base_dir: The directory to containing the experiment to be run.

    @param ntrials: The number of trials to perform with different seeds.

    @param seed: The initial seed used to generate the other random seeds.
    """
    # Generate the number of requested seeds
    seeds = generate_seeds(ntrials, seed)

    # Get the configuration
    with open(os.path.join(base_dir, 'config.json'), 'r') as f:
        config = json.load(f)

    # Get the data and base metric data
    with open(os.path.join(base_dir, 'dataset.pkl'), 'rb') as f:
        data = pickle.load(f)
        uniqueness_data, overlap_data, correlation_data = pickle.load(f)

    # Metrics
    metrics = SPMetrics()

    # Execute each run
    for s in seeds:
        # Update the seed
        config['seed'] = s

        # Create the SP
        sp = SPRegion(**config)

        # Fit the SP
        sp.fit(data)

        # Get the SP's output
        sp_output = sp.predict(data)

        # Log all of the metrics
        sp._log_stats('Input Uniqueness', uniqueness_data)
        sp._log_stats('Input Overlap', overlap_data)
        sp._log_stats('Input Correlation', correlation_data)
        sp._log_stats('SP Uniqueness', metrics.compute_uniqueness(sp_output))
        sp._log_stats('SP Overlap', metrics.compute_overlap(sp_output))
        sp._log_stats('SP Correlation',
                      1 - metrics.compute_distance(sp_output))
Exemple #12
0
def base_experiment(pct_noise=0.15,
                    noverlap_bits=0,
                    exp_name='1-1',
                    ntrials=10,
                    verbose=True,
                    seed=123456789):
    """
	Run a single experiment, locally.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param exp_name: The name of the experiment.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	
	@return: A tuple containing the percentage errors for the SP's training
	and testing results and the SVM's training and testing results,
	respectively.
	"""

    # Base parameters
    ntrain, ntest = 800, 200
    nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
    clf_th = 0.5
    log_dir = os.path.join(os.path.expanduser('~'), 'scratch',
                           'novelty_experiments', exp_name)

    # Configure the SP
    config = {
        'ninputs': 100,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'pct_active': None,
        'random_permanence': True,
        'pwindow': 0.5,
        'global_inhibition': True,
        'ncolumns': 200,
        'nactive': 50,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'nepochs': 10,
        'log_dir': log_dir
    }

    # Seed numpy
    np.random.seed(seed)

    # Create the base dataset
    x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
    x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]

    # Create the outlier dataset
    base_indexes = set(np.where(x_ds.base_class == 1)[0])
    choices = [x for x in xrange(nbits) if x not in base_indexes]
    outlier_base = np.zeros(nbits, dtype='bool')
    outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
                                  False)] = 1
    outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
    y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
    y_te = y_ds.data

    if verbose:
        print "\nBase class' test noise: {0:2.2f}".format(
            1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.)
        print "Outlier's class noise: {0:2.2f}".format(
            1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.)
        print 'Overlap between two classes: {0}'.format(
            np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i')))

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the datasets
    u_x_tr = metrics.compute_uniqueness(x_tr)
    o_x_tr = metrics.compute_overlap(x_tr)
    c_x_tr = 1 - metrics.compute_distance(x_tr)
    u_x_te = metrics.compute_uniqueness(x_te)
    o_x_te = metrics.compute_overlap(x_te)
    c_x_te = 1 - metrics.compute_distance(x_te)
    u_y_te = metrics.compute_uniqueness(y_te)
    o_y_te = metrics.compute_overlap(y_te)
    c_y_te = 1 - metrics.compute_distance(y_te)

    # Initialize the overall results
    sp_x_results = np.zeros(ntrials)
    sp_y_results = np.zeros(ntrials)
    svm_x_results = np.zeros(ntrials)
    svm_y_results = np.zeros(ntrials)

    # Iterate across the trials:
    for i in xrange(ntrials):
        # Make a new seed
        seed2 = np.random.randint(1000000)
        config['seed'] = seed2
        config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1)

        # Create the SP
        sp = SPRegion(**config)

        # Fit the SP
        sp.fit(x_tr)

        # Get the SP's output
        sp_x_tr = sp.predict(x_tr)
        sp_x_te = sp.predict(x_te)
        sp_y_te = sp.predict(y_te)

        # Get the metrics for the SP's results
        u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
        o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
        c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr)
        u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
        o_sp_x_te = metrics.compute_overlap(sp_x_te)
        c_sp_x_te = 1 - metrics.compute_distance(sp_x_te)
        u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
        o_sp_y_te = metrics.compute_overlap(sp_y_te)
        c_sp_y_te = 1 - metrics.compute_distance(sp_y_te)

        # Log all of the metrics
        sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
        sp._log_stats('Input Base Class Train Overlap', o_x_tr)
        sp._log_stats('Input Base Class Train Correlation', c_x_tr)
        sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
        sp._log_stats('Input Base Class Test Overlap', o_x_te)
        sp._log_stats('Input Base Class Test Correlation', c_x_te)
        sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
        sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
        sp._log_stats('Input Novelty Class Test Correlation', c_y_te)
        sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
        sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
        sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr)
        sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
        sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
        sp._log_stats('SP Base Class Test Correlation', c_sp_x_te)
        sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
        sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)
        sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te)

        # Print the results
        fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}'
        if verbose:
            print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
            print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
                               u_sp_x_te, u_sp_y_te)
            print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr,
                               o_sp_x_te, o_sp_y_te)
            print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te,
                               c_sp_x_tr, c_sp_x_te, c_sp_y_te)

        # Get average representation of the base class
        sp_base_result = np.mean(sp_x_tr, 0)
        sp_base_result[sp_base_result >= 0.5] = 1
        sp_base_result[sp_base_result < 1] = 0

        # Averaged results for each metric type
        u_sp_base_to_x_te = 0.
        o_sp_base_to_x_te = 0.
        c_sp_base_to_x_te = 0.
        u_sp_base_to_y_te = 0.
        o_sp_base_to_y_te = 0.
        c_sp_base_to_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the sums
            u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
            o_sp_base_to_x_te += metrics.compute_overlap(xt)
            c_sp_base_to_x_te += 1 - metrics.compute_distance(xt)
            u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
            o_sp_base_to_y_te += metrics.compute_overlap(yt)
            c_sp_base_to_y_te += 1 - metrics.compute_distance(yt)
        u_sp_base_to_x_te /= ntest
        o_sp_base_to_x_te /= ntest
        c_sp_base_to_x_te /= ntest
        u_sp_base_to_y_te /= ntest
        o_sp_base_to_y_te /= ntest
        c_sp_base_to_y_te /= ntest

        # Log the results
        sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te)
        sp._log_stats('Base Train to Novelty Test Uniqueness',
                      u_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Correlation',
                      c_sp_base_to_y_te)

        # Print the results
        if verbose:
            print '\nDescription\tx_tr->x_te\tx_tr->y_te'
            print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(
                u_sp_base_to_x_te, u_sp_base_to_y_te)
            print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(
                o_sp_base_to_x_te, o_sp_base_to_y_te)
            print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(
                c_sp_base_to_x_te, c_sp_base_to_y_te)

        # Create an SVM
        clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)

        # Evaluate the SVM's performance
        clf.fit(x_tr)
        svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
         100
        svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
         100

        # Perform classification using overlap as the feature
        # -- The overlap must be above 50%
        clf_x_te = 0.
        clf_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the accuracy
            xo = metrics.compute_overlap(xt)
            yo = metrics.compute_overlap(yt)
            if xo >= clf_th: clf_x_te += 1
            if yo < clf_th: clf_y_te += 1
        clf_x_te = (clf_x_te / ntest) * 100
        clf_y_te = (clf_y_te / ntest) * 100

        # Store the results as errors
        sp_x_results[i] = 100 - clf_x_te
        sp_y_results[i] = 100 - clf_y_te
        svm_x_results[i] = 100 - svm_x_te
        svm_y_results[i] = 100 - svm_y_te

        # Log the results
        sp._log_stats('SP % Correct Base Class', clf_x_te)
        sp._log_stats('SP % Correct Novelty Class', clf_y_te)
        sp._log_stats('SVM % Correct Base Class', svm_x_te)
        sp._log_stats('SVM % Correct Novelty Class', svm_y_te)

        # Print the results
        if verbose:
            print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
            print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
            print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
            print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)

    return sp_x_results, sp_y_results, svm_x_results, svm_y_results
Exemple #13
0
def fit_grid():
	"""
	Use a grid technique with many SPs.
	"""
	
	p = 'results\\mnist_filter'
	# try:
		# os.makedirs(p)
	# except OSError:
		# pass
	np.random.seed(123456789)
	# kargs = {
		# 'ninputs': 9,
		# 'ncolumns': 100,
		# 'nsynapses': 5,
		# 'random_permanence': True,
		# 'pinc':0.03, 'pdec':0.05,
		# 'seg_th': 3,
		# 'nactive': 10,
		# 'duty_cycle': 100,
		# 'max_boost': 10,
		# 'global_inhibition': True,
		# 'trim': 1e-4
	# }
	kargs2 = {
		'ninputs': 100 * (26 ** 2),
		'ncolumns': 2048,
		'nsynapses': 1000,
		'random_permanence': True,
		'pinc':0.03, 'pdec':0.05,
		'seg_th': 5,
		'nactive': 20,
		'duty_cycle': 100,
		'max_boost': 10,
		'global_inhibition': True,
		'trim': 1e-4
	}
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = get_data()
	nwindows = 26 ** 2
	
	# # Make the SPs
	# sps = [SPRegion(**kargs) for _ in xrange(nwindows)]
	
	# # Train the SPs
	# nepochs = 10
	# t = time.time()
	# for i in xrange(nepochs):
		# print i
		# for j, x in enumerate(tr_x):
			# print '\t{0}'.format(j)
			# nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(
				# nwindows, 9)
			# for xi, sp in izip(nx, sps):
				# sp.step(xi)
	# t1 = time.time() - t
	# print t1
	
	# # Save this batch of SPs
	# for i, sp in enumerate(sps):
		# sp.learn = False
		# sp.save(os.path.join(p, 'sp0-{0}.pkl'.format(i)))
	
	# Make the top level SP
	sp2 = SPRegion(**kargs2)
	
	# Get the SPs
	sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0']
	
	# Train the top SP
	nepochs = 10
	t = time.time()
	for i in xrange(nepochs):
		print i
		for j, x in enumerate(tr_x):
			print '\t{0}'.format(j)
			nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(
				nwindows, 9)
			output = np.array(np.zeros(100 * nwindows), dtype='bool')
			for k, (xi, sp) in enumerate(izip(nx, sps)):
				sp.step(xi)
				output[k*100:(k*100)+100] = sp.y[:, 0]
			sp2.step(output)
	t2 = time.time() - t
	print t2
	
	# Save the top SP
	sp2.learn = False
	sp2.save(os.path.join(p, 'sp1-0.pkl'))
Exemple #14
0
def first_level(log_dir, ntrain=800, ntest=200, nsplits=1, seed=123456789):
    # Details of the filter
    win_size = 7
    total_win_size = win_size * win_size
    nwindows = 16

    # SP arguments
    kargs = {
        'ninputs': total_win_size,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 35,
        'seg_th': 5,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': os.path.join(log_dir, '1-1')
    }

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    tr, te = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed).gen.next()
    tr, te = tr[:ntrain], te[:ntest]

    # Store the labels to disk
    with open(os.path.join(log_dir, 'labels.pkl'), 'wb') as f:
        cPickle.dump((y[tr], y[te]), f, cPickle.HIGHEST_PROTOCOL)
    del tr_y
    del te_y
    del y

    # Build the training data
    train_data = np.zeros((nwindows, ntrain, total_win_size), dtype='bool')
    for i in xrange(ntrain):
        xi = x[tr[i]]
        for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)):
            train_data[j, i] = window

    # Build the testing data
    test_data = np.zeros((nwindows, ntest, total_win_size), dtype='bool')
    for i in xrange(ntest):
        xi = x[te[i]]
        for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)):
            test_data[j, i] = window
    del tr_x
    del te_x
    del x

    # Make the SPs
    sps = [SPRegion(**kargs) for _ in xrange(nwindows)]

    # Execute the SPs in parallel
    Parallel(n_jobs=-1)(delayed(execute)(sp, tr, te)
                        for sp, tr, te in izip(sps, train_data, test_data))
Exemple #15
0
def fit_grid():
    """
	Use a grid technique with many SPs.
	"""

    p = 'results\\mnist_filter'
    # try:
    # os.makedirs(p)
    # except OSError:
    # pass
    np.random.seed(123456789)
    # kargs = {
    # 'ninputs': 9,
    # 'ncolumns': 100,
    # 'nsynapses': 5,
    # 'random_permanence': True,
    # 'pinc':0.03, 'pdec':0.05,
    # 'seg_th': 3,
    # 'nactive': 10,
    # 'duty_cycle': 100,
    # 'max_boost': 10,
    # 'global_inhibition': True,
    # 'trim': 1e-4
    # }
    kargs2 = {
        'ninputs': 100 * (26**2),
        'ncolumns': 2048,
        'nsynapses': 1000,
        'random_permanence': True,
        'pinc': 0.03,
        'pdec': 0.05,
        'seg_th': 5,
        'nactive': 20,
        'duty_cycle': 100,
        'max_boost': 10,
        'global_inhibition': True,
        'trim': 1e-4
    }

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = get_data()
    nwindows = 26**2

    # # Make the SPs
    # sps = [SPRegion(**kargs) for _ in xrange(nwindows)]

    # # Train the SPs
    # nepochs = 10
    # t = time.time()
    # for i in xrange(nepochs):
    # print i
    # for j, x in enumerate(tr_x):
    # print '\t{0}'.format(j)
    # nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(
    # nwindows, 9)
    # for xi, sp in izip(nx, sps):
    # sp.step(xi)
    # t1 = time.time() - t
    # print t1

    # # Save this batch of SPs
    # for i, sp in enumerate(sps):
    # sp.learn = False
    # sp.save(os.path.join(p, 'sp0-{0}.pkl'.format(i)))

    # Make the top level SP
    sp2 = SPRegion(**kargs2)

    # Get the SPs
    sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0']

    # Train the top SP
    nepochs = 10
    t = time.time()
    for i in xrange(nepochs):
        print i
        for j, x in enumerate(tr_x):
            print '\t{0}'.format(j)
            nx = extract_patches_2d(x.reshape(28, 28),
                                    (3, 3)).reshape(nwindows, 9)
            output = np.array(np.zeros(100 * nwindows), dtype='bool')
            for k, (xi, sp) in enumerate(izip(nx, sps)):
                sp.step(xi)
                output[k * 100:(k * 100) + 100] = sp.y[:, 0]
            sp2.step(output)
    t2 = time.time() - t
    print t2

    # Save the top SP
    sp2.learn = False
    sp2.save(os.path.join(p, 'sp1-0.pkl'))
def base_experiment(config, pct_noise=0.15, noverlap_bits=0, ntrials=10,
	verbose=False, seed=123456789):
	"""
	Run a single experiment, locally.
	
	@param config: The configuration parameters.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	"""
	
	# Base parameters
	ntrain, ntest = 800, 200
	nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
	clf_th = 0.5
	
	# Build the directory, if needed
	base_dir = config['log_dir']
	if not os.path.exists(base_dir): os.makedirs(base_dir)
	
	# Seed numpy
	np.random.seed(seed)
	
	# Create the base dataset
	x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
	x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]
	
	# Create the outlier dataset
	base_indexes = set(np.where(x_ds.base_class == 1)[0])
	choices = [x for x in xrange(nbits) if x not in base_indexes]
	outlier_base = np.zeros(nbits, dtype='bool')
	outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
		False)] = 1
	outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
	y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
	y_te = y_ds.data
	
	if verbose:
		print "\nBase class' test noise: {0:2.2f}".format(1 - (np.mean(x_te, 0)
			* x_ds.base_class.astype('i')).sum() / 40.)
		print "Outlier's class noise: {0:2.2f}".format(1 - (np.mean(y_te, 0) *
			outlier_base.astype('i')).sum() / 40.)
		print 'Overlap between two classes: {0}'.format(np.dot(
			x_ds.base_class.astype('i'), outlier_base.astype('i')))
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the datasets
	u_x_tr = metrics.compute_uniqueness(x_tr)
	o_x_tr = metrics.compute_overlap(x_tr)
	u_x_te = metrics.compute_uniqueness(x_te)
	o_x_te = metrics.compute_overlap(x_te)
	u_y_te = metrics.compute_uniqueness(y_te)
	o_y_te = metrics.compute_overlap(y_te)
	
	# Initialize the overall results
	sp_x_results = np.zeros(ntrials)
	sp_y_results = np.zeros(ntrials)
	svm_x_results = np.zeros(ntrials)
	svm_y_results = np.zeros(ntrials)
	
	# Iterate across the trials:
	for i, seed2 in enumerate(generate_seeds(ntrials, seed)):
		# Create the SP
		config['seed'] = seed2
		sp = SPRegion(**config)
		
		# Fit the SP
		sp.fit(x_tr)
		
		# Get the SP's output
		sp_x_tr = sp.predict(x_tr)
		sp_x_te = sp.predict(x_te)
		sp_y_te = sp.predict(y_te)
		
		# Get the metrics for the SP's results
		u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
		o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
		u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
		o_sp_x_te = metrics.compute_overlap(sp_x_te)
		u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
		o_sp_y_te = metrics.compute_overlap(sp_y_te)
		
		# Log all of the metrics
		sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
		sp._log_stats('Input Base Class Train Overlap', o_x_tr)
		sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
		sp._log_stats('Input Base Class Test Overlap', o_x_te)
		sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
		sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
		sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
		sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
		sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
		sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
		sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
		sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)
		
		# Print the results
		fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{6:2.4f}'
		if verbose:
			print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
			print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
				u_sp_x_te, u_sp_y_te)
			print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr,
				o_sp_x_te, o_sp_y_te)
		
		# Get average representation of the base class
		sp_base_result = np.mean(sp_x_tr, 0)
		sp_base_result[sp_base_result >= 0.5] = 1
		sp_base_result[sp_base_result < 1] = 0
		
		# Averaged results for each metric type
		u_sp_base_to_x_te = 0.
		o_sp_base_to_x_te = 0.
		u_sp_base_to_y_te = 0.
		o_sp_base_to_y_te = 0.
		for x, y in zip(sp_x_te, sp_y_te):
			# Refactor
			xt = np.vstack((sp_base_result, x))
			yt = np.vstack((sp_base_result, y))
			
			# Compute the sums
			u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
			o_sp_base_to_x_te += metrics.compute_overlap(xt)
			u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
			o_sp_base_to_y_te += metrics.compute_overlap(yt)
		u_sp_base_to_x_te /= ntest
		o_sp_base_to_x_te /= ntest
		u_sp_base_to_y_te /= ntest
		o_sp_base_to_y_te /= ntest
		
		# Log the results
		sp._log_stats('Base Train to Base Test Uniqueness',
			u_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
		sp._log_stats('Base Train to Novelty Test Uniqueness',
			u_sp_base_to_y_te)
		sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)
		
		# Print the results
		if verbose:
			print '\nDescription\tx_tr->x_te\tx_tr->y_te'
			print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(u_sp_base_to_x_te,
				u_sp_base_to_y_te)
			print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(o_sp_base_to_x_te,
				o_sp_base_to_y_te)
		
		# Create an SVM
		clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)
		
		# Evaluate the SVM's performance
		clf.fit(x_tr)
		svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
			100
		svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
			100
		
		# Perform classification using overlap as the feature
		# -- The overlap must be above 50%
		clf_x_te = 0.
		clf_y_te = 0.
		for x, y in zip(sp_x_te, sp_y_te):
			# Refactor
			xt = np.vstack((sp_base_result, x))
			yt = np.vstack((sp_base_result, y))
			
			# Compute the accuracy
			xo = metrics.compute_overlap(xt)
			yo = metrics.compute_overlap(yt)
			if xo >= clf_th: clf_x_te += 1
			if yo < clf_th: clf_y_te += 1
		clf_x_te = (clf_x_te / ntest) * 100
		clf_y_te = (clf_y_te / ntest) * 100
		
		# Store the results as errors
		sp_x_results[i] = 100 - clf_x_te
		sp_y_results[i] = 100 - clf_y_te
		svm_x_results[i] = 100 - svm_x_te
		svm_y_results[i] = 100 - svm_y_te
		
		# Log the results
		sp._log_stats('SP % Correct Base Class', clf_x_te)
		sp._log_stats('SP % Correct Novelty Class', clf_y_te)
		sp._log_stats('SVM % Correct Base Class', svm_x_te)
		sp._log_stats('SVM % Correct Novelty Class', svm_y_te)
		
		# Print the results
		if verbose:
			print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
			print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
			print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
			print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)
	
	# Save the results
	with open(os.path.join(base_dir, 'results.pkl'), 'wb') as f:
		cPickle.dump((sp_x_results, sp_y_results, svm_x_results,
			svm_y_results), f, cPickle.HIGHEST_PROTOCOL)
Exemple #17
0
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789):
	# Set the configuration parameters for the SP
	ninputs = 784
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ninputs,
		'nactive': 20,
		'global_inhibition': True,
		'trim': False,
		'seed': seed,
		
		'max_boost': 3,
		'duty_cycle': 8,
		
		'nsynapses': 392,
		'seg_th': 2,
		
		'syn_th': 0.5,
		'pinc': 0.01,
		'pdec': 0.02,
		'pwindow': 0.5,
		'random_permanence': True,
		
		'nepochs': 1,
		'clf': LinearSVC(random_state=seed),
		'log_dir': os.path.join('simple_mnist', '1-1')
	}
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Split the data for CV
	cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)
	
	# Execute the SP on each fold. Additionally, get results for each fitting
	# method.
	for i, (tr, te) in enumerate(cv):
		# Create the region
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(x[tr], y[tr])
		
		# Test the base classifier
		clf = LinearSVC(random_state=seed)
		clf.fit(x[tr], y[tr])
		score = clf.score(x[te], y[te])
		print 'SVM Only Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the column method
		score = sp.score(x[te], y[te])
		print 'Column Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the probabilistic method
		score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob')
		print 'Probabilistic Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the dimensionality reduction method
		score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
		ndims = len(sp.reduce_dimensions(x[0]))
		print 'Input Reduced from {0} to {1}: {2:.1f}X reduction'.format(
			ninputs, ndims, ninputs / float(ndims))
		print 'Reduction Accuracy: {0:.2f}%'.format(score * 100)
	
	# Get a random set of unique inputs from the training set
	inputs = np.zeros((10, ninputs))
	for i in xrange(10):
		ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
		inputs[i] = x[tr][ix]
	
	# Get the SP's predictions for the inputs
	sp_pred = sp.predict(inputs)
	
	# Get the reconstruction in the context of the SP
	sp_inputs = sp.reconstruct_input(sp_pred)
	
	# Make a plot comparing the two
	x1_labels = [str(i) for i in xrange(10)]
	x2_labels = [str(i) for i in xrange(10)]
	title = 'Input Reconstruction: Original (top), SP (bottom)'
	shape = (28, 28)
	path = os.path.join(sp.log_dir, 'input_reconstruction.png')
	plot_compare_images((inputs, sp_inputs), shape, title, (x1_labels,
		x2_labels,), path)
Exemple #18
0
def sp_one_level(base_dir, data_path='data.csv', seed=123456789):
	"""
	Test the SP.
	"""
	
	# Make a new directory
	new_dir = os.path.join(base_dir, time.strftime('%Y%m%d-%H%M%S',
		time.localtime()))
	os.makedirs(new_dir)
	
	# Params
	nsplits = 8
	pct_train = 0.8
	
	# Get data
	data = pd.read_csv(data_path)
	x = data.ix[:, :-1].as_matrix()
	y = data.ix[:, -1].as_matrix()
	x, y = convert_data_to_int(x, y)
	
	# Create the encoder
	num_bits_per_encoder = 50
	category_encoders = [
		CategoryEncoder(
			num_categories=len(set(xi)), 
			num_bits=num_bits_per_encoder
		) for xi in x.T
	]
	total_bits = num_bits_per_encoder*len(category_encoders)
	encoder = MultiEncoder(
		*category_encoders
	)
	
	# Build the config for the SP
	ncolumns = 4096
	nactive = int(ncolumns * 0.20)
	nsynapses = 25
	seg_th = 0
	sp_config = {
		'ninputs': total_bits,
		'ncolumns': ncolumns,
		'nactive': nactive,
		'global_inhibition': True,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		
		'nsynapses': nsynapses,
		'seg_th': seg_th,
		
		'syn_th': 0.5,
		'pinc': 0.001,
		'pdec': 0.001,
		'pwindow': 0.5,
		'random_permanence': True,
		
		'nepochs': 1,
		'log_dir': os.path.join(new_dir, '1-1'),
		'clf': LinearSVC(random_state=seed)
	}
	
	# Encode all of the data
	new_x = np.zeros((len(x), total_bits), dtype='bool')
	for i in xrange(len(x)):
		encoder.bind_data([(x[i,j], j) for j in xrange(x.shape[1])])
		new_x[i] = np.array(list(encoder.encode()), dtype='bool')
	
	# Dump the data and the details
	with open(os.path.join(new_dir, 'input.pkl'), 'wb') as f:
		cPickle.dump((new_x, y), f, cPickle.HIGHEST_PROTOCOL)
	with open(os.path.join(new_dir, 'details.csv'), 'wb') as f:
		writer = csv.writer(f)
		category_encoder_details = [[
			'Category {0}: Num bits: {1}'.format(i, c.num_bits),
			'Category {0}: Active bits: {1}'.format(i, c.active_bits),
			'Category {0}: Num categories: {1}'.format(i, c.num_categories)]
			for i, c in enumerate(category_encoders)]
		writer.writerows(category_encoder_details)
		writer.writerow(['Num splits', nsplits])
		writer.writerow(['% train', pct_train])
		writer.writerow(['Seed', seed])
	
	# Run the experiment
	sss = StratifiedShuffleSplit(y, n_iter=nsplits, train_size=pct_train,
		random_state=seed)
	results = Parallel(n_jobs=-1)(delayed(train_score_clf)(
		SPRegion(**sp_config), new_x[tr], new_x[te], y[tr], y[te])
		for i, (tr, te) in enumerate(sss))
	pct_accuracy = np.median(results)
	print ['{0:.3f}'.format(r) for r in results]
	print 'SP + Linear SVM: {0:.3f} %'.format(pct_accuracy)
	with open(os.path.join(new_dir, 'details.csv'), 'ab') as f:
		writer = csv.writer(f)
		writer.writerow(['% Accuracy', pct_accuracy])
def run_single_experiment(base_dir, ntrials=10, seed=123456789):
	"""
	Run the actual experiment.
	
	@param base_dir: The directory to containing the experiment to be run.
	
	@param ntrials: The number of trials to perform with different seeds.
	
	@param seed: The initial seed used to generate the other random seeds.
	"""
	
	# Generate the number of requested seeds
	seeds = generate_seeds(ntrials, seed)
	
	# Get the configuration
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		config = json.load(f)
	
	# Get the data and base metric data
	with open(os.path.join(base_dir, 'dataset.pkl'), 'rb') as f:
		data = cPickle.load(f)
		uniqueness_data, overlap_data, correlation_data = cPickle.load(f)
	
	# Metrics
	metrics = SPMetrics()
	
	# Execute each run
	for s in seeds:
		# Update the seed
		config['seed'] = s
		
		# Create the SP
		sp = SPRegion(**config)
		
		# Fit the SP
		sp.fit(data)
		
		# Get the SP's output
		sp_output = sp.predict(data)
		
		# Log all of the metrics
		sp._log_stats('Input Uniqueness', uniqueness_data)
		sp._log_stats('Input Overlap', overlap_data)
		sp._log_stats('Input Correlation', correlation_data)
		sp._log_stats('SP Uniqueness', metrics.compute_uniqueness(sp_output))
		sp._log_stats('SP Overlap', metrics.compute_overlap(sp_output))
		sp._log_stats('SP Correlation', 1 - metrics.compute_distance(
			sp_output))
Exemple #20
0
def main():
    """
	Program entry.
	
	Build an SP using SPDataset and see how it performs.
	"""

    # Params
    nsamples, nbits, pct_active = 500, 100, 0.4
    seed = 123456789
    base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple')
    kargs = {
        'ninputs': nbits,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': os.path.join(base_path, '1-1')
    }

    # Build items to store results
    npoints = 25
    pct_noises = np.linspace(0, 1, npoints, False)
    uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints)
    similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints)
    similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints)
    similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints)
    dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints)
    overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints)
    correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints)

    # Metrics
    metrics = SPMetrics()

    # Vary input noise
    for i, pct_noise in enumerate(pct_noises):
        print 'Iteration {0} of {1}'.format(i + 1, npoints)

        # Build the dataset
        ds = SPDataset(nsamples=nsamples,
                       nbits=nbits,
                       pct_active=pct_active,
                       pct_noise=pct_noise,
                       seed=seed)

        # Get the dataset stats
        uniqueness_data[i] = metrics.compute_uniqueness(ds.data)
        similarity_data[i] = metrics.compute_total_similarity(
            ds.data, confidence_interval=0.9)
        similarity_data1[i] = metrics.compute_one_similarity(
            ds.data, confidence_interval=0.9)
        similarity_data0[i] = metrics.compute_zero_similarity(
            ds.data, confidence_interval=0.9)
        dissimilarity_data[i] = metrics.compute_dissimilarity(
            ds.data, confidence_interval=0.9)
        overlap_data[i] = metrics.compute_overlap(ds.data)
        correlation_data[i] = 1 - metrics.compute_distance(ds.data)

        # Build the SP
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(ds.data)

        # Get the SP's output SDRs
        sp_output = sp.predict(ds.data)

        # Get the stats
        uniqueness_sp[i] = metrics.compute_uniqueness(sp_output)
        similarity_sp[i] = metrics.compute_total_similarity(
            sp_output, confidence_interval=0.9)
        similarity_sp1[i] = metrics.compute_one_similarity(
            sp_output, confidence_interval=0.9)
        similarity_sp0[i] = metrics.compute_zero_similarity(
            sp_output, confidence_interval=0.9)
        dissimilarity_sp[i] = metrics.compute_dissimilarity(
            sp_output, confidence_interval=0.9)
        overlap_sp[i] = metrics.compute_overlap(sp_output)
        correlation_sp[i] = 1 - metrics.compute_distance(sp_output)

    # Make some plots
    print 'Showing uniqueness - 0% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [uniqueness_data * 100, uniqueness_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label='Uniqueness [%]',
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'uniqueness.png'),
              show=True)
    print 'Showing total similarity - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data * 100, similarity_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label='Total similarity [%]',
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'similarity.png'),
              show=True)
    print 'Showing similarity of "1" bits - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data1 * 100, similarity_sp1 * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Similarity of '1's [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'one_similarity.png'),
              show=True)
    print 'Showing similarity of "0" bits - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data0 * 100, similarity_sp0 * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Similarity of '0's [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'zero_similarity.png'),
              show=True)
    print 'Showing dissimilarity - 0% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [dissimilarity_data * 100, dissimilarity_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Dissimilarity [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'dissimilarity.png'),
              show=True)
    print 'Showing average normalized overlap - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [overlap_data * 100, overlap_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="% Normalized Overlap",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'overlap.png'),
              show=True)
    print 'Showing % average sample correlation coefficient - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [correlation_data * 100, correlation_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="% Correlation",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'correlation.png'),
              show=True)

    print '*** All data saved in "{0}" ***'.format(base_path)
def local_experiment():
	"""
	Run a single experiment, locally.
	"""
	
	seed = 123456789
	config = {
		'ninputs': 100,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		'pct_active': None,
		'random_permanence': True,
		'pwindow': 0.5,
		
		'global_inhibition': True,
		
		'ncolumns': 200,
		'nactive': 50,
		
		
		'nsynapses': 100,
		'seg_th': 5,
		
		'syn_th': 0.5,
		
		'pinc': 0.001,
		'pdec': 0.001,
		
		'nepochs': 10,
		
		'log_dir': os.path.join(os.path.expanduser('~'), 'scratch',
			'param_experiments', '1-1')
	}
	
	# Get the data
	nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15
	ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed)
	data = ds.data
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the dataset
	uniqueness_data = metrics.compute_uniqueness(data)
	overlap_data = metrics.compute_overlap(data)
	correlation_data = 1 - metrics.compute_distance(data)
	
	# Create the SP
	sp = SPRegion(**config)
	
	# Fit the SP
	sp.fit(data)
	
	# Get the SP's output
	sp_output = sp.predict(data)
	
	# Get the metrics for the SP's results
	sp_uniqueness = metrics.compute_uniqueness(sp_output)
	sp_overlap = metrics.compute_overlap(sp_output)
	sp_correlation = 1 - metrics.compute_distance(sp_output)
	
	# Log all of the metrics
	sp._log_stats('Input Uniqueness', uniqueness_data)
	sp._log_stats('Input Overlap', overlap_data)
	sp._log_stats('Input Correlation', correlation_data)
	sp._log_stats('SP Uniqueness', sp_uniqueness)
	sp._log_stats('SP Overlap', sp_overlap)
	sp._log_stats('SP Correlation', sp_correlation)

	print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(uniqueness_data,
		sp_uniqueness)
	print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(overlap_data, sp_overlap)
	print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(correlation_data,
		sp_correlation)
	
	# Get a new random input
	ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123)
	print '\n% Overlapping old class to new: \t{0:2.4f}%'.format(
		(float(np.dot(ds.input, ds2.input)) / nbits) * 100)
	
	# Test the SP on the new dataset
	sp_output2 = sp.predict(ds2.data)
	
	# Get average representation of first result
	original_result = np.mean(sp_output, 0)
	original_result[original_result >= 0.5] = 1
	original_result[original_result < 1] = 0
	
	# Get averaged results for each metric type
	sp_uniqueness2 = 0.
	sp_overlap2 = 0.
	sp_correlation2 = 0.
	for item in sp_output2:
		test = np.vstack((original_result, item))
		sp_uniqueness2 = metrics.compute_uniqueness(test)
		sp_overlap2 = metrics.compute_overlap(test)
		sp_correlation2 = 1 - metrics.compute_distance(test)
	sp_uniqueness2 /= len(sp_output2)
	sp_overlap2 /= len(sp_output2)
	sp_correlation2 /= len(sp_output2)
	print sp_uniqueness2, sp_overlap2, sp_correlation2
Exemple #22
0
def local_experiment():
    """Run a single experiment, locally."""
    seed = 123456789
    config = {
        'ninputs':
        100,
        'trim':
        1e-4,
        'disable_boost':
        True,
        'seed':
        seed,
        'pct_active':
        None,
        'random_permanence':
        True,
        'pwindow':
        0.5,
        'global_inhibition':
        True,
        'ncolumns':
        200,
        'nactive':
        50,
        'nsynapses':
        100,
        'seg_th':
        5,
        'syn_th':
        0.5,
        'pinc':
        0.001,
        'pdec':
        0.001,
        'nepochs':
        10,
        'log_dir':
        os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments',
                     '1-1')
    }

    # Get the data
    nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15
    ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed)
    data = ds.data

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the dataset
    uniqueness_data = metrics.compute_uniqueness(data)
    overlap_data = metrics.compute_overlap(data)
    correlation_data = 1 - metrics.compute_distance(data)

    # Create the SP
    sp = SPRegion(**config)

    # Fit the SP
    sp.fit(data)

    # Get the SP's output
    sp_output = sp.predict(data)

    # Get the metrics for the SP's results
    sp_uniqueness = metrics.compute_uniqueness(sp_output)
    sp_overlap = metrics.compute_overlap(sp_output)
    sp_correlation = 1 - metrics.compute_distance(sp_output)

    # Log all of the metrics
    sp._log_stats('Input Uniqueness', uniqueness_data)
    sp._log_stats('Input Overlap', overlap_data)
    sp._log_stats('Input Correlation', correlation_data)
    sp._log_stats('SP Uniqueness', sp_uniqueness)
    sp._log_stats('SP Overlap', sp_overlap)
    sp._log_stats('SP Correlation', sp_correlation)

    print(f'Uniqueness:\t{uniqueness_data:2.4f}\t{sp_uniqueness:2.4f}')
    print(f'Overlap:\t{overlap_data:2.4f}\t{sp_overlap:2.4f}')
    print(f'Correlation:\t{correlation_data:2.4f}\t{sp_correlation:2.4f}')

    # Get a new random input
    ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123)
    print(f'\n% Overlapping old class to new: \
          \t{(float(np.dot(ds.input, ds2.input)) / nbits) * 100:2.4f}%')

    # Test the SP on the new dataset
    sp_output2 = sp.predict(ds2.data)

    # Get average representation of first result
    original_result = np.mean(sp_output, 0)
    original_result[original_result >= 0.5] = 1
    original_result[original_result < 1] = 0

    # Get averaged results for each metric type
    sp_uniqueness2 = 0.
    sp_overlap2 = 0.
    sp_correlation2 = 0.
    for item in sp_output2:
        test = np.vstack((original_result, item))
        sp_uniqueness2 = metrics.compute_uniqueness(test)
        sp_overlap2 = metrics.compute_overlap(test)
        sp_correlation2 = 1 - metrics.compute_distance(test)
    sp_uniqueness2 /= len(sp_output2)
    sp_overlap2 /= len(sp_output2)
    sp_correlation2 /= len(sp_output2)
    print(sp_uniqueness2, sp_overlap2, sp_correlation2)
def base_experiment(config, ntrials=1, seed=123456789):
	"""
	Run a single experiment, locally.
		
	@param config: The configuration parameters to use for the SP.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param seed: The random seed to use.
	
	@return: A tuple containing the percentage errors for the SP's training
	and testing results and the SVM's training and testing results,
	respectively.
	"""
	
	# Base parameters
	ntrain, ntest = 800, 200
	clf_th = 0.5
	
	# Seed numpy
	np.random.seed(seed)
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	tr_x_0 = np.random.permutation(tr_x[tr_y == 0])
	x_tr = tr_x_0[:ntrain]
	x_te = tr_x_0[ntrain:ntrain + ntest]
	outliers = [np.random.permutation(tr_x[tr_y == i])[:ntest] for i in
		xrange(1, 10)]
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the datasets
	u_x_tr = metrics.compute_uniqueness(x_tr)
	o_x_tr = metrics.compute_overlap(x_tr)
	c_x_tr = 1 - metrics.compute_distance(x_tr)
	u_x_te = metrics.compute_uniqueness(x_te)
	o_x_te = metrics.compute_overlap(x_te)
	c_x_te = 1 - metrics.compute_distance(x_te)
	u_y_te, o_y_te, c_y_te = [], [], []
	for outlier in outliers:
		u_y_te.append(metrics.compute_uniqueness(outlier))
		o_y_te.append(metrics.compute_overlap(outlier))
		c_y_te.append(1 - metrics.compute_distance(outlier))
	
	# Initialize the overall results
	sp_x_results = np.zeros(ntrials)
	sp_y_results = [np.zeros(ntrials) for _ in xrange(9)]
	svm_x_results = np.zeros(ntrials)
	svm_y_results = [np.zeros(ntrials) for _ in xrange(9)]
	
	# Iterate across the trials:
	for nt in xrange(ntrials):
		# Make a new seeod
		seed2 = np.random.randint(1000000)
		config['seed'] = seed2
		
		# Create the SP
		sp = SPRegion(**config)
		
		# Fit the SP
		sp.fit(x_tr)
		
		# Get the SP's output
		sp_x_tr = sp.predict(x_tr)
		sp_x_te = sp.predict(x_te)
		sp_y_te = [sp.predict(outlier) for outlier in outliers]
		
		# Get the metrics for the SP's results
		u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
		o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
		c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr)
		u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
		o_sp_x_te = metrics.compute_overlap(sp_x_te)
		c_sp_x_te = 1 - metrics.compute_distance(sp_x_te)
		u_sp_y_te, o_sp_y_te, c_sp_y_te = [], [], []
		for y in sp_y_te:
			u_sp_y_te.append(metrics.compute_uniqueness(y))
			o_sp_y_te.append(metrics.compute_overlap(y))
			c_sp_y_te.append(1 - metrics.compute_distance(y))
		
		# Log all of the metrics
		sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
		sp._log_stats('Input Base Class Train Overlap', o_x_tr)
		sp._log_stats('Input Base Class Train Correlation', c_x_tr)
		sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
		sp._log_stats('Input Base Class Test Overlap', o_x_te)
		sp._log_stats('Input Base Class Test Correlation', c_x_te)
		sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
		sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
		sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr)
		sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
		sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
		sp._log_stats('SP Base Class Test Correlation', c_sp_x_te)
		for i, (a, b, c, d, e, f) in enumerate(zip(u_y_te, o_y_te, c_y_te,
			u_sp_y_te, o_sp_y_te, c_sp_y_te), 1):
			sp._log_stats('Input Novelty Class {0} Uniqueness'.format(i), a)
			sp._log_stats('Input Novelty Class {0} Overlap'.format(i), b)
			sp._log_stats('Input Novelty Class {0} Correlation'.format(i), c)	
			sp._log_stats('SP Novelty Class {0} Uniqueness'.format(i), d)
			sp._log_stats('SP Novelty Class {0} Overlap'.format(i), e)
			sp._log_stats('SP Novelty Class {0} Correlation'.format(i), f)
		
		# Get average representation of the base class
		sp_base_result = np.mean(sp_x_tr, 0)
		sp_base_result[sp_base_result >= 0.5] = 1
		sp_base_result[sp_base_result < 1] = 0
		
		# Averaged results for each metric type
		u_sp_base_to_x_te = 0.
		o_sp_base_to_x_te = 0.
		c_sp_base_to_x_te = 0.
		u_sp, o_sp, c_sp = np.zeros(9), np.zeros(9), np.zeros(9)
		for i, x in enumerate(sp_x_te):
			xt = np.vstack((sp_base_result, x))
			u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
			o_sp_base_to_x_te += metrics.compute_overlap(xt)
			c_sp_base_to_x_te += 1 - metrics.compute_distance(xt)
			
			for j, yi in enumerate(sp_y_te):
				yt = np.vstack((sp_base_result, yi[i]))
				u_sp[j] += metrics.compute_uniqueness(yt)
				o_sp[j] += metrics.compute_overlap(yt)
				c_sp[j] += 1 - metrics.compute_distance(yt)
		u_sp_base_to_x_te /= ntest
		o_sp_base_to_x_te /= ntest
		c_sp_base_to_x_te /= ntest
		for i in xrange(9):
			u_sp[i] /= ntest
			o_sp[i] /= ntest
			c_sp[i] /= ntest
		
		# Log the results
		sp._log_stats('Base Train to Base Test Uniqueness',
			u_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te)
		for i, j in enumerate(xrange(1, 10)):
			sp._log_stats('Base Train to Novelty {0} Uniqueness'.format(j),
				u_sp[i])
			sp._log_stats('Base Train to Novelty {0} Overlap'.format(j),
				o_sp[i])
			sp._log_stats('Base Train to Novelty {0} Correlation'.format(j),
				c_sp[i])
		
		# Create an SVM
		clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)
		
		# Evaluate the SVM's performance
		clf.fit(x_tr)
		svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
			100
		svm_y_te = np.array([len(np.where(clf.predict(outlier) == -1)[0]) /
			float(ntest) * 100 for outlier in outliers])
		
		# Perform classification using overlap as the feature
		# -- The overlap must be above 50%
		clf_x_te = 0.
		clf_y_te = np.zeros(9)
		for i, x in enumerate(sp_x_te):
			xt = np.vstack((sp_base_result, x))
			xo = metrics.compute_overlap(xt)
			if xo >= clf_th: clf_x_te += 1
			
			for j, yi in enumerate(sp_y_te):
				yt = np.vstack((sp_base_result, yi[i]))
				yo = metrics.compute_overlap(yt)
				if yo < clf_th: clf_y_te[j] += 1
		clf_x_te = (clf_x_te / ntest) * 100
		clf_y_te = (clf_y_te / ntest) * 100
		
		# Store the results as errors
		sp_x_results[nt] = 100 - clf_x_te
		sp_y_results[nt] = 100 - clf_y_te
		svm_x_results[nt] = 100 - svm_x_te
		svm_y_results[nt] = 100 - svm_y_te
		
		# Log the results
		sp._log_stats('SP % Correct Base Class', clf_x_te)
		sp._log_stats('SVM % Correct Base Class', svm_x_te)
		for i, j in enumerate(xrange(1, 10)):
			sp._log_stats('SP % Correct Novelty Class {0}'.format(j),
				clf_y_te[i])
			sp._log_stats('SVM % Correct Novelty Class {0}'.format(j),
				svm_y_te[i])
		sp._log_stats('SP % Mean Correct Novelty Class', np.mean(clf_y_te))
		sp._log_stats('SVM % Mean Correct Novelty Class', np.mean(svm_y_te))
		sp._log_stats('SP % Adjusted Score', (np.mean(clf_y_te) * clf_x_te) /
			100)
		sp._log_stats('SVM % Adjusted Score', (np.mean(svm_y_te) * svm_x_te) /
			100)
	
	return sp_x_results, sp_y_results, svm_x_results, svm_y_results
Exemple #24
0
def full_mnist(base_dir, new_dir, auto_update=False):
	"""
	Execute a full MNIST run using the parameters specified by ix.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config.
	
	@param new_dir: The full path of where the data should be saved.
	
	@param auto_update: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['log_dir'] = new_dir
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()

	# Manually compute the permanence update amounts
	if auto_update:
		# Compute average sum of each training instance
		avg_s = tr_x.sum(1)
		
		# Compute the total average sum
		avg_ts = avg_s.mean()
		
		# Compute the average active probability
		a_p = avg_ts / float(tr_x.shape[1])
		
		# Compute the scaling factor
		scaling_factor = 1 / avg_ts
		
		# Compute the update amounts
		pinc = scaling_factor * (1 / a_p)
		pdec = scaling_factor * (1 / (1 - a_p))
		
		# Update the config
		kargs['pinc'], kargs['pdec'] = pinc, pdec
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(tr_x, tr_y)
	
	# Column accuracy
	clf.score(te_x, te_y)
	
	# Probabilistic accuracy
	clf.score(te_x, te_y, tr_x=tr_x, score_method='prob')
	
	# Dimensionality reduction method
	clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction')
	ndims = len(clf.reduce_dimensions(tr_x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
def main(ds,
         p,
         ncols=2048,
         duty_cycle=100,
         nepochs=10,
         global_inhibition=True,
         seed=123456789):
    """Run an experiment.

    @param ds: The dataset.

    @param p: The full path to the directory to save the results.

    @param ncols: The number of columns.

    @param duty_cycle: The duty cycle.

    @param nepochs: The number of epochs

    @param global_inhibition: If True use global inhibition otherwise use local
    inhibition.

    @param seed: The random seed.
    """
    # Get some parameters
    ninputs = ds.shape[1]
    density = np.sum(ds[0]) / float(ninputs)

    # Make the directory if it doesn't exist
    try:
        os.makedirs(p)
    except OSError:
        pass

    # Initializations
    np.random.seed(seed)
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ncols,
        'nsynapses': 40,
        'random_permanence': True,
        'pinc': 0.03,
        'pdec': 0.05,
        'seg_th': 15,
        'nactive': int(0.02 * ncols),
        'duty_cycle': duty_cycle,
        'max_boost': 10,
        'global_inhibition': global_inhibition,
        'trim': 1e-4
    }

    # Create the region
    delattr(SPRegion, '_phase3')
    setattr(SPRegion, '_phase3', _phase3)
    sp = SPRegion(**kargs)
    sp.iter, sp.out_path = 1, p

    # Train the region
    t = time.time()
    for i in range(nepochs):
        for j, x in enumerate(ds):
            sp.execute(x)
            sp.iter += 1
    t = time.time() - t

    # Dump the details
    kargs['density'] = density
    kargs['seed'] = seed
    kargs['nepochs'] = nepochs
    kargs['time'] = t
    with open(os.path.join(p, 'details.json'), 'w') as f:
        f.write(
            json.dumps(kargs, sort_keys=True, indent=4,
                       separators=(',', ': ')))
Exemple #26
0
def one_cv(base_dir, cv_split):
	"""
	Run the MNIST experiment. Only the specified CV split is executed.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	
	@param cv_split: The index for the CV split.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)),
		'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	tr, te = cv[cv_split - 1]
	
	# Remove the split directory, if it exists
	shutil.rmtree(os.path.join(base_dir, str(cv_split)), True)
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(x[tr], y[tr])
	
	# Column accuracy
	clf.score(x[te], y[te])
	
	# Probabilistic accuracy
	clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
	
	# Dimensionality reduction method
	clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
	ndims = len(clf.reduce_dimensions(x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
def base_experiment(config,
                    pct_noise=0.15,
                    noverlap_bits=0,
                    ntrials=10,
                    verbose=False,
                    seed=123456789):
    """
	Run a single experiment, locally.
	
	@param config: The configuration parameters.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	"""

    # Base parameters
    ntrain, ntest = 800, 200
    nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
    clf_th = 0.5

    # Build the directory, if needed
    base_dir = config['log_dir']
    if not os.path.exists(base_dir): os.makedirs(base_dir)

    # Seed numpy
    np.random.seed(seed)

    # Create the base dataset
    x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
    x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]

    # Create the outlier dataset
    base_indexes = set(np.where(x_ds.base_class == 1)[0])
    choices = [x for x in xrange(nbits) if x not in base_indexes]
    outlier_base = np.zeros(nbits, dtype='bool')
    outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
                                  False)] = 1
    outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
    y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
    y_te = y_ds.data

    if verbose:
        print "\nBase class' test noise: {0:2.2f}".format(
            1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.)
        print "Outlier's class noise: {0:2.2f}".format(
            1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.)
        print 'Overlap between two classes: {0}'.format(
            np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i')))

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the datasets
    u_x_tr = metrics.compute_uniqueness(x_tr)
    o_x_tr = metrics.compute_overlap(x_tr)
    u_x_te = metrics.compute_uniqueness(x_te)
    o_x_te = metrics.compute_overlap(x_te)
    u_y_te = metrics.compute_uniqueness(y_te)
    o_y_te = metrics.compute_overlap(y_te)

    # Initialize the overall results
    sp_x_results = np.zeros(ntrials)
    sp_y_results = np.zeros(ntrials)
    svm_x_results = np.zeros(ntrials)
    svm_y_results = np.zeros(ntrials)

    # Iterate across the trials:
    for i, seed2 in enumerate(generate_seeds(ntrials, seed)):
        # Create the SP
        config['seed'] = seed2
        sp = SPRegion(**config)

        # Fit the SP
        sp.fit(x_tr)

        # Get the SP's output
        sp_x_tr = sp.predict(x_tr)
        sp_x_te = sp.predict(x_te)
        sp_y_te = sp.predict(y_te)

        # Get the metrics for the SP's results
        u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
        o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
        u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
        o_sp_x_te = metrics.compute_overlap(sp_x_te)
        u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
        o_sp_y_te = metrics.compute_overlap(sp_y_te)

        # Log all of the metrics
        sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
        sp._log_stats('Input Base Class Train Overlap', o_x_tr)
        sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
        sp._log_stats('Input Base Class Test Overlap', o_x_te)
        sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
        sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
        sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
        sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
        sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
        sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
        sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
        sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)

        # Print the results
        fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{6:2.4f}'
        if verbose:
            print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
            print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
                               u_sp_x_te, u_sp_y_te)
            print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr,
                               o_sp_x_te, o_sp_y_te)

        # Get average representation of the base class
        sp_base_result = np.mean(sp_x_tr, 0)
        sp_base_result[sp_base_result >= 0.5] = 1
        sp_base_result[sp_base_result < 1] = 0

        # Averaged results for each metric type
        u_sp_base_to_x_te = 0.
        o_sp_base_to_x_te = 0.
        u_sp_base_to_y_te = 0.
        o_sp_base_to_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the sums
            u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
            o_sp_base_to_x_te += metrics.compute_overlap(xt)
            u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
            o_sp_base_to_y_te += metrics.compute_overlap(yt)
        u_sp_base_to_x_te /= ntest
        o_sp_base_to_x_te /= ntest
        u_sp_base_to_y_te /= ntest
        o_sp_base_to_y_te /= ntest

        # Log the results
        sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
        sp._log_stats('Base Train to Novelty Test Uniqueness',
                      u_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)

        # Print the results
        if verbose:
            print '\nDescription\tx_tr->x_te\tx_tr->y_te'
            print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(
                u_sp_base_to_x_te, u_sp_base_to_y_te)
            print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(
                o_sp_base_to_x_te, o_sp_base_to_y_te)

        # Create an SVM
        clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)

        # Evaluate the SVM's performance
        clf.fit(x_tr)
        svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
         100
        svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
         100

        # Perform classification using overlap as the feature
        # -- The overlap must be above 50%
        clf_x_te = 0.
        clf_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the accuracy
            xo = metrics.compute_overlap(xt)
            yo = metrics.compute_overlap(yt)
            if xo >= clf_th: clf_x_te += 1
            if yo < clf_th: clf_y_te += 1
        clf_x_te = (clf_x_te / ntest) * 100
        clf_y_te = (clf_y_te / ntest) * 100

        # Store the results as errors
        sp_x_results[i] = 100 - clf_x_te
        sp_y_results[i] = 100 - clf_y_te
        svm_x_results[i] = 100 - svm_x_te
        svm_y_results[i] = 100 - svm_y_te

        # Log the results
        sp._log_stats('SP % Correct Base Class', clf_x_te)
        sp._log_stats('SP % Correct Novelty Class', clf_y_te)
        sp._log_stats('SVM % Correct Base Class', svm_x_te)
        sp._log_stats('SVM % Correct Novelty Class', svm_y_te)

        # Print the results
        if verbose:
            print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
            print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
            print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
            print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)

    # Save the results
    with open(os.path.join(base_dir, 'results.pkl'), 'wb') as f:
        cPickle.dump(
            (sp_x_results, sp_y_results, svm_x_results, svm_y_results), f,
            cPickle.HIGHEST_PROTOCOL)
Exemple #28
0
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789):
    """Run a simple MNIST classification task."""
    # Set the configuration parameters for the SP
    ninputs = 784
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ninputs,
        'nactive': 30,
        'global_inhibition': True,
        'trim': False,
        'seed': seed,
        'disable_boost': True,
        'nsynapses': 392,
        'seg_th': 10,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.002,
        'pwindow': 0.01,
        'random_permanence': True,
        'nepochs': 10,
        'clf': LinearSVC(random_state=seed),
        'log_dir': os.path.join('simple_mnist', '1-1')
    }

    # Seed numpy
    np.random.seed(seed)

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)

    # Execute the SP on each fold. Additionally, get results for each fitting
    # method.
    for i, (tr, te) in enumerate(cv):
        # Create the region
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x[tr], y[tr])

        # Test the base classifier
        clf = LinearSVC(random_state=seed)
        clf.fit(x[tr], y[tr])
        score = clf.score(x[te], y[te])
        print('SVM Only Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the column method
        score = sp.score(x[te], y[te])
        print('Column Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the probabilistic method
        score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob')
        print('Probabilistic Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the dimensionality reduction method
        score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
        ndims = len(sp.reduce_dimensions(x[0]))
        print('Input Reduced from {0} to {1}: {2:.1f}X reduction'.format(
            ninputs, ndims, ninputs / float(ndims)))
        print('Reduction Accuracy: {0:.2f}%'.format(score * 100))

    # Get a random set of unique inputs from the training set
    inputs = np.zeros((10, ninputs))
    for i in range(10):
        ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
        inputs[i] = x[tr][ix]

    # Get the SP's predictions for the inputs
    sp_pred = sp.predict(inputs)

    # Get the reconstruction in the context of the SP
    sp_inputs = sp.reconstruct_input(sp_pred)

    # Make a plot comparing the images
    title = 'Input Reconstruction: Original (top), SP SDRs (middle), ' \
        'SP Reconstruction (bottom)'
    shape = (28, 28)
    path = os.path.join(sp.log_dir, 'input_reconstruction.png')
    plot_compare_images((inputs, sp_pred, sp_inputs),
                        shape,
                        title,
                        out_path=path)
Exemple #29
0
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567):
	# Set the configuration parameters for the SP
	ninputs = 784
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ninputs,
		'nactive': 10,
		'global_inhibition': True,
		'trim': False,
		'seed': seed,
		
		'disable_boost': True,
		
		'nsynapses': 392,
		'seg_th': 10,
		
		'syn_th': 0.5,
		'pinc': 0.001,
		'pdec': 0.002,
		'pwindow': 0.01,
		'random_permanence': True,
		
		'nepochs': 10,
		'clf': LinearSVC(random_state=seed),
		'log_dir': os.path.join('simple_mnist', '1-1')
	}
	
	# Seed numpy
	np.random.seed(seed)
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Split the data for CV
	cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)
	
	# Execute the SP on each fold. Additionally, get results for each fitting
	# method.
	for i, (tr, te) in enumerate(cv):
		# Create the region
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(x[tr], y[tr])
		
		# Test the base classifier
		clf = LinearSVC(random_state=seed)
		clf.fit(x[tr], y[tr])
	
	# Get a random set of unique inputs from the training set
	inputs = np.zeros((10, ninputs))
	for i in xrange(10):
		ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
		inputs[i] = x[tr][ix]
	
	# Get the SP's predictions for the inputs
	sp_pred = sp.predict(inputs)
	
	# Get the reconstruction in the context of the SP
	sp_inputs = sp.reconstruct_input(sp_pred)
	
	# Make a plot comparing the images
	shape = (28, 28)
	path = os.path.join(sp.log_dir, 'input_reconstruction.png')
	plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)
Exemple #30
0
def full_cv(base_dir):
	"""
	Run the MNIST experiment. Each CV split is executed sequentially.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	
	# Execute each run
	for tr, te in cv:
		clf = SPRegion(**kargs)
		clf.fit(x[tr], y[tr])
		
		# Column accuracy
		clf.score(x[te], y[te])
		
		# Probabilistic accuracy
		clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
		
		# Dimensionality reduction method
		clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
		ndims = len(clf.reduce_dimensions(x[0]))
		clf._log_stats('Number of New Dimensions', ndims)
Exemple #31
0
def base_experiment(pct_noise=0.15, noverlap_bits=0, exp_name='1-1',
	ntrials=10, verbose=True, seed=123456789):
	"""
	Run a single experiment, locally.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param exp_name: The name of the experiment.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	
	@return: A tuple containing the percentage errors for the SP's training
	and testing results and the SVM's training and testing results,
	respectively.
	"""
	
	# Base parameters
	ntrain, ntest = 800, 200
	nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
	clf_th = 0.5
	log_dir = os.path.join(os.path.expanduser('~'), 'scratch',
		'novelty_experiments', exp_name)
	
	# Configure the SP
	config = {
		'ninputs': 100,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		'pct_active': None,
		'random_permanence': True,
		'pwindow': 0.5,
		
		'global_inhibition': True,
		
		'ncolumns': 200,
		'nactive': 50,
		
		
		'nsynapses': 75,
		'seg_th': 15,
		
		'syn_th': 0.5,
		
		'pinc': 0.001,
		'pdec': 0.001,
		
		'nepochs': 10,
		
		'log_dir': log_dir
	}
	
	# Seed numpy
	np.random.seed(seed)
	
	# Create the base dataset
	x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
	x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]
	
	# Create the outlier dataset
	base_indexes = set(np.where(x_ds.base_class == 1)[0])
	choices = [x for x in xrange(nbits) if x not in base_indexes]
	outlier_base = np.zeros(nbits, dtype='bool')
	outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
		False)] = 1
	outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
	y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
	y_te = y_ds.data
	
	if verbose:
		print "\nBase class' test noise: {0:2.2f}".format(1 - (np.mean(x_te, 0)
			* x_ds.base_class.astype('i')).sum() / 40.)
		print "Outlier's class noise: {0:2.2f}".format(1 - (np.mean(y_te, 0) *
			outlier_base.astype('i')).sum() / 40.)
		print 'Overlap between two classes: {0}'.format(np.dot(
			x_ds.base_class.astype('i'), outlier_base.astype('i')))
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the datasets
	u_x_tr = metrics.compute_uniqueness(x_tr)
	o_x_tr = metrics.compute_overlap(x_tr)
	c_x_tr = 1 - metrics.compute_distance(x_tr)
	u_x_te = metrics.compute_uniqueness(x_te)
	o_x_te = metrics.compute_overlap(x_te)
	c_x_te = 1 - metrics.compute_distance(x_te)
	u_y_te = metrics.compute_uniqueness(y_te)
	o_y_te = metrics.compute_overlap(y_te)
	c_y_te = 1 - metrics.compute_distance(y_te)
	
	# Initialize the overall results
	sp_x_results = np.zeros(ntrials)
	sp_y_results = np.zeros(ntrials)
	svm_x_results = np.zeros(ntrials)
	svm_y_results = np.zeros(ntrials)
	
	# Iterate across the trials:
	for i in xrange(ntrials):
		# Make a new seed
		seed2 = np.random.randint(1000000)
		config['seed'] = seed2
		config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1)
		
		# Create the SP
		sp = SPRegion(**config)
		
		# Fit the SP
		sp.fit(x_tr)
		
		# Get the SP's output
		sp_x_tr = sp.predict(x_tr)
		sp_x_te = sp.predict(x_te)
		sp_y_te = sp.predict(y_te)
		
		# Get the metrics for the SP's results
		u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
		o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
		c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr)
		u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
		o_sp_x_te = metrics.compute_overlap(sp_x_te)
		c_sp_x_te = 1 - metrics.compute_distance(sp_x_te)
		u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
		o_sp_y_te = metrics.compute_overlap(sp_y_te)
		c_sp_y_te = 1 - metrics.compute_distance(sp_y_te)
		
		# Log all of the metrics
		sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
		sp._log_stats('Input Base Class Train Overlap', o_x_tr)
		sp._log_stats('Input Base Class Train Correlation', c_x_tr)
		sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
		sp._log_stats('Input Base Class Test Overlap', o_x_te)
		sp._log_stats('Input Base Class Test Correlation', c_x_te)
		sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
		sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
		sp._log_stats('Input Novelty Class Test Correlation', c_y_te)	
		sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
		sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
		sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr)
		sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
		sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
		sp._log_stats('SP Base Class Test Correlation', c_sp_x_te)
		sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
		sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)
		sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te)
		
		# Print the results
		fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}'
		if verbose:
			print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
			print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
				u_sp_x_te, u_sp_y_te)
			print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te,
				o_sp_y_te)
			print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te, c_sp_x_tr,
				c_sp_x_te, c_sp_y_te)
		
		# Get average representation of the base class
		sp_base_result = np.mean(sp_x_tr, 0)
		sp_base_result[sp_base_result >= 0.5] = 1
		sp_base_result[sp_base_result < 1] = 0
		
		# Averaged results for each metric type
		u_sp_base_to_x_te = 0.
		o_sp_base_to_x_te = 0.
		c_sp_base_to_x_te = 0.
		u_sp_base_to_y_te = 0.
		o_sp_base_to_y_te = 0.
		c_sp_base_to_y_te = 0.
		for x, y in zip(sp_x_te, sp_y_te):
			# Refactor
			xt = np.vstack((sp_base_result, x))
			yt = np.vstack((sp_base_result, y))
			
			# Compute the sums
			u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
			o_sp_base_to_x_te += metrics.compute_overlap(xt)
			c_sp_base_to_x_te += 1 - metrics.compute_distance(xt)
			u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
			o_sp_base_to_y_te += metrics.compute_overlap(yt)
			c_sp_base_to_y_te += 1 - metrics.compute_distance(yt)
		u_sp_base_to_x_te /= ntest
		o_sp_base_to_x_te /= ntest
		c_sp_base_to_x_te /= ntest
		u_sp_base_to_y_te /= ntest
		o_sp_base_to_y_te /= ntest
		c_sp_base_to_y_te /= ntest
		
		# Log the results
		sp._log_stats('Base Train to Base Test Uniqueness',
			u_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te)
		sp._log_stats('Base Train to Novelty Test Uniqueness',
			u_sp_base_to_y_te)
		sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)
		sp._log_stats('Base Train to Novelty Test Correlation',
			c_sp_base_to_y_te)
		
		# Print the results
		if verbose:
			print '\nDescription\tx_tr->x_te\tx_tr->y_te'
			print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(u_sp_base_to_x_te,
				u_sp_base_to_y_te)
			print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(o_sp_base_to_x_te,
				o_sp_base_to_y_te)
			print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(c_sp_base_to_x_te,
				c_sp_base_to_y_te)
		
		# Create an SVM
		clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)
		
		# Evaluate the SVM's performance
		clf.fit(x_tr)
		svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
			100
		svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
			100
		
		# Perform classification using overlap as the feature
		# -- The overlap must be above 50%
		clf_x_te = 0.
		clf_y_te = 0.
		for x, y in zip(sp_x_te, sp_y_te):
			# Refactor
			xt = np.vstack((sp_base_result, x))
			yt = np.vstack((sp_base_result, y))
			
			# Compute the accuracy
			xo = metrics.compute_overlap(xt)
			yo = metrics.compute_overlap(yt)
			if xo >= clf_th: clf_x_te += 1
			if yo < clf_th: clf_y_te += 1
		clf_x_te = (clf_x_te / ntest) * 100
		clf_y_te = (clf_y_te / ntest) * 100
		
		# Store the results as errors
		sp_x_results[i] = 100 - clf_x_te
		sp_y_results[i] = 100 - clf_y_te
		svm_x_results[i] = 100 - svm_x_te
		svm_y_results[i] = 100 - svm_y_te
		
		# Log the results
		sp._log_stats('SP % Correct Base Class', clf_x_te)
		sp._log_stats('SP % Correct Novelty Class', clf_y_te)
		sp._log_stats('SVM % Correct Base Class', svm_x_te)
		sp._log_stats('SVM % Correct Novelty Class', svm_y_te)
		
		# Print the results
		if verbose:
			print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
			print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
			print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
			print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)
	
	return sp_x_results, sp_y_results, svm_x_results, svm_y_results
Exemple #32
0
def main(ncols, npsyns, ninputs, density, seg_th, syn_th, ntrials=100,
	seed=123456789):
	"""
	Compare the theoretical to the experimental results.
	
	@param ncols: The number of columns.
	
	@param npsyns: The number of proximal synapses.
	
	@param ninputs: The number of inputs.
	
	@param density: The percentage of active bits in the input.
	
	@param seg_th: The threshold for a segment to become active.
	
	@param syn_th: The threshold at which synapses are connected.
	
	@param ntrials: The number of trials to perform for the experimental
	results.
	
	@param seed: Seed for the random number generator for 
	"""
	
	print '**** THEORETICAL ****'
	print 'Probability that an input will be selected: {0:2.2f}%'.format(
		p_a1(npsyns, ninputs) * 100)
	p = p_c(ncols, npsyns, ninputs)
	print 'Probability of all inputs being selected: {0:2.2f}%'.format((1 - p)
		* 100)
	print 'Expected inputs not seen:', int(p * ninputs )
	print 'Expected number of columns connected to an input:', int(e_b(ncols,
		npsyns, ninputs))
	print 'Expected number of active synapses on a column: {0:2.2f}'.format(
		e_c(npsyns, density))
	print 'Expected number of active connected synapses on a column: ' \
		'{0:2.2f}'.format(e_d(npsyns, density, syn_th))
	print 'Expected number of columns with active inputs >= seg_th: {0:2.2f}' \
		.format(e_e(npsyns, density, ncols, seg_th))
	print 'Expected number of columns with active connected inputs >= ' \
		'seg_th: {0:2.2f}'.format(e_f(npsyns, density, ncols, seg_th,
		syn_th))
	
	# Prep the experimental
	print '\n**** Experimental ****'
	np.random.seed(seed)
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ncols,
		'nsynapses': npsyns,
		'syn_th': syn_th,
		'seg_th': seg_th
	}
	
	#### Average number of active bits potentially connected to a column
	# Build input
	x = np.zeros(ninputs, dtype='bool')
	nactive = int(ninputs * (density))
	indexes = set(np.random.randint(0, ninputs, nactive))
	while len(indexes) != nactive:
		indexes.add(np.random.randint(0, ninputs - 1, 1)[0])
	x[list(indexes)] = True
	
	# Simulate
	y0 = y1 = y2 = y3 = y4 = y5 = 0.
	for _ in xrange(ntrials):
		sp = SPRegion(**kargs)
		a = x[sp.syn_map].sum(1)
		b = (x[sp.syn_map] * (sp.p >= syn_th)).sum(1)
		y0 += ninputs - len(set(sp.syn_map.ravel()))
		y1 += np.mean(np.array([np.sum(sp.syn_map == i) for i in
			xrange(ninputs)]))
		y2 += a.mean()
		y3 += b.mean()
		y4 += (a >= seg_th).sum()
		y5 += (b >= seg_th).sum()
	print 'Average number of missing inputs: {0:.2f}'.format(y0 / ntrials)
	print 'Average number of columns connected to an input: {0:.2f}'.format(
		y1 / ntrials)
	print 'Average number of active inputs per column: {0:.2f}'.format(y2
		/ ntrials)
	print 'Average number of active connected inputs per column: {0:.2f}' \
		.format(y3 / ntrials)
	print 'Number of columns with active inputs >= seg_th {0:.2f}'.format(
		y4 / ntrials)
	print 'Number of columns with active connected inputs >= seg_th {0:.2f}' \
		.format(y5 / ntrials)
Exemple #33
0
def main():
	"""
	Program entry.
	
	Build an SP using SPDataset and see how it performs.
	"""
	
	# Params
	nsamples, nbits, pct_active = 500, 100, 0.4
	ncolumns = 300
	base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple')
	seed = 123456789
	kargs = {
		'ninputs': nbits,
		'ncolumns': 300,
		'nactive': 0.02 * ncolumns,
		'global_inhibition': True,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		
		'nsynapses': 20,
		'seg_th': 2,
		
		'syn_th': 0.5,
		'pinc': 0.01,
		'pdec': 0.01,
		'pwindow': 0.5,
		'random_permanence': True,
		
		'nepochs': 1,
		'log_dir': os.path.join(base_path, '1-1')
	}
	
	# Build items to store results
	npoints = 25
	pct_noises = np.linspace(0, pct_active / 2, npoints, False)
	uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints)
	similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints)
	similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints)
	similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints)
	dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints)
	overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints)
	correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints)
	
	# Metrics
	metrics = SPMetrics()
	
	# Vary input noise
	for i, pct_noise in enumerate(pct_noises):
		print 'Iteration {0} of {1}'.format(i + 1, npoints)
		
		# Build the dataset
		ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active,
			pct_noise=pct_noise, seed=seed)
		
		# Get the dataset stats
		uniqueness_data[i] = metrics.compute_uniqueness(ds.data)
		similarity_data[i] = metrics.compute_total_similarity(ds.data,
			confidence_interval=0.9)
		similarity_data1[i] = metrics.compute_one_similarity(ds.data,
			confidence_interval=0.9)
		similarity_data0[i] = metrics.compute_zero_similarity(ds.data,
			confidence_interval=0.9)
		dissimilarity_data[i] = metrics.compute_dissimilarity(ds.data,
			confidence_interval=0.9)
		overlap_data[i] = metrics.compute_overlap(ds.data)
		correlation_data[i] = 1 - metrics.compute_distance(ds.data)
		
		# Build the SP
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(ds.data)
		
		# Get the SP's output SDRs
		sp_output = sp.predict(ds.data)
		
		# Get the stats
		uniqueness_sp[i] = metrics.compute_uniqueness(sp_output)
		similarity_sp[i] = metrics.compute_total_similarity(sp_output,
			confidence_interval=0.9)
		similarity_sp1[i] = metrics.compute_one_similarity(sp_output,
			confidence_interval=0.9)
		similarity_sp0[i] = metrics.compute_zero_similarity(sp_output,
			confidence_interval=0.9)
		dissimilarity_sp[i] = metrics.compute_dissimilarity(sp_output,
			confidence_interval=0.9)
		overlap_sp[i] = metrics.compute_overlap(sp_output)
		correlation_sp[i] = 1 - metrics.compute_distance(sp_output)
	
	# Make some plots
	print 'Showing uniqueness - 0% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [uniqueness_data * 100,
		uniqueness_sp * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label='Uniqueness [%]', xlim=False, ylim=False,
		out_path=os.path.join(base_path, 'uniqueness.png'), show=True)
	print 'Showing total similarity - 100% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [similarity_data * 100,
		similarity_sp * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label='Total similarity [%]', xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'similarity.png'),
		show=True)
	print 'Showing similarity of "1" bits - 100% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [similarity_data1 * 100,
		similarity_sp1 * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label="Similarity of '1's [%]", xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'one_similarity.png'),
		show=True)
	print 'Showing similarity of "0" bits - 100% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [similarity_data0 * 100,
		similarity_sp0 * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label="Similarity of '0's [%]", xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'zero_similarity.png'),
		show=True)
	print 'Showing dissimilarity - 0% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [dissimilarity_data * 100,
		dissimilarity_sp * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label="Dissimilarity [%]", xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'dissimilarity.png'),
		show=True)
	print 'Showing average normalized overlap - 100% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [overlap_data * 100,
		overlap_sp * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label="% Normalized Overlap", xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'overlap.png'),
		show=True)
	print 'Showing % average sample correlation cofficient - 100% is ideal'
	plot_line([pct_noises * 100, pct_noises * 100], [correlation_data * 100,
		correlation_sp * 100], series_names=('Raw Data', 'SP Output'),
		x_label='% Noise', y_label="% Correlation", xlim=False,
		ylim=False, out_path=os.path.join(base_path, 'correlation.png'),
		show=True)
	
	print '*** All data saved in "{0}" ***'.format(base_path)
Exemple #34
0
def base_experiment(log_dir, seed = 123456789):
	"""
	The base experiment.
	
	Build an SP using SPDataset and see how it performs.
	
	@param log_dir: The full path to the log directory.
	
	@param seed: The random seed to use.
	
	@return: Tuple containing: SP uniqueness, input uniqueness, SP overlap,
	input overlap.
	"""
	
	# Params
	nsamples, nbits, pct_active = 500, 100, 0.4
	kargs = {
		'ninputs': nbits,
		'ncolumns': 200,
		'nactive': 50,
		'global_inhibition': True,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		
		'nsynapses': 75,
		'seg_th': 15,
		
		'syn_th': 0.5,
		'pinc': 0.001,
		'pdec': 0.001,
		'pwindow': 0.5,
		'random_permanence': True,
		
		'nepochs': 10,
		'log_dir': log_dir
	}
	
	# Seed numpy
	np.random.seed(seed)
	
	# Build items to store results
	npoints = 11
	pct_noises = np.linspace(0, 1, npoints)
	u_sp, u_ip = np.zeros(npoints), np.zeros(npoints)
	o_sp, o_ip = np.zeros(npoints), np.zeros(npoints)
	
	# Metrics
	metrics = SPMetrics()
		
	# Vary input noise
	for i, pct_noise in enumerate(pct_noises):
		# Build the dataset
		ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active,
			pct_noise=pct_noise, seed=seed)
		x = ds.data
		
		# Get the dataset stats
		u_ip[i] = metrics.compute_uniqueness(x) * 100
		o_ip[i] = metrics.compute_overlap(x) * 100
		
		# Build the SP
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(x)
		
		# Get the SP's output SDRs
		sp_output = sp.predict(x)
		
		# Get the stats
		u_sp[i] = metrics.compute_uniqueness(sp_output) * 100
		o_sp[i] = (metrics.compute_overlap(sp_output) +
			metrics.compute_overlap(np.logical_not(sp_output))) * 50
		
		# Log everything
		sp._log_stats('% Input Uniqueness', u_ip[i])
		sp._log_stats('% Input Overlap', o_ip[i])
		sp._log_stats('% SP Uniqueness', u_sp[i])
		sp._log_stats('% SP Overlap', o_sp[i])
	
	return u_sp, u_ip, o_sp, o_ip
Exemple #35
0
def main(ds, p, ncols=2048, duty_cycle=100, nepochs=10, global_inhibition=True,
	seed=123456789):
	"""
	Run an experiment.
	
	@param ds: The dataset.
	
	@param p: The full path to the directory to save the results.
	
	@param ncols: The number of columns.
	
	@param duty_cycle: The duty cycle.
	
	@param nepochs: The number of epochs
	
	@param global_inhibition: If True use global inhibition otherwise use local
	inhibition.
	
	@param seed: The random seed.
	"""
	
	# Get some parameters
	ninputs = ds.shape[1]
	density = np.sum(ds[0]) / float(ninputs)
	
	# Make the directory if it doesn't exist
	try:
		os.makedirs(p)
	except OSError:
		pass
	
	# Initializations
	np.random.seed(seed)
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ncols,
		'nsynapses': 40,
		'random_permanence': True,
		'pinc':0.03, 'pdec':0.05,
		'seg_th': 15,
		'nactive': int(0.02 * ncols),
		'duty_cycle': duty_cycle,
		'max_boost': 10,
		'global_inhibition': global_inhibition,
		'trim': 1e-4
	}
	
	# Create the region
	delattr(SPRegion, '_phase3')
	setattr(SPRegion, '_phase3', _phase3)
	sp = SPRegion(**kargs)
	sp.iter, sp.out_path = 1, p
	
	# Train the region
	t = time.time()
	for i in xrange(nepochs):
		for j, x in enumerate(ds):
			sp.execute(x)
			sp.iter += 1
	t = time.time() - t
	
	# Dump the details
	kargs['density'] = density
	kargs['seed'] = seed
	kargs['nepochs'] = nepochs
	kargs['time'] = t
	with open(os.path.join(p, 'details.json'), 'wb') as f:
		f.write(json.dumps(kargs, sort_keys=True, indent=4,
			separators=(',', ': ')))