def go(kargs, type): """ Execute SP. @param kargs: The params for the SP. @param type: Run type. @return: The new sp and the execution time. """ # Run the SP t = time.time() for _ in xrange(n_iters): sp = SPRegion(**kargs) sp.c_sboost = 0 # Ensure that no permanence boosting occurs sp.execute(ds, store=False) t = time.time() - t # Dump the permanence matrix with open(os.path.join(p, '{0}-permanence.pkl'.format(type)), 'wb') \ as f: cPickle.dump(sp.p, f, cPickle.HIGHEST_PROTOCOL) # Dump the details kargs['density'] = density kargs['seed'] = seed kargs['time'] = t with open(os.path.join(p, '{0}-details.json'.format(type)), 'wb') as f: f.write(json.dumps(kargs, sort_keys=True, indent=4, separators=(',', ': '))) return sp, t
def main(): print "Early fused binary SDRs processing" hl.tic() # Parameters to construct cortical structure nbits, pct_active, nobjs = 2048, 0.4, 51 nofcols = 2048 # Binary path bin_path_efussion = '/home/neurobot/Datasets/mmodal_washington_io/efusion/' befusion_header, sefusion_header = 'befusion', 'sefusion' # Spool path spool_efus = '/home/neurobot/Datasets/spool_washington_io/mm_early_spool/' kargs = { 'ninputs': nbits, 'ncolumns': nofcols, 'nactive': int(nbits * 0.2), 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 100, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10 } sp = SPRegion(**kargs) # Change the path according to modality type data_path, data_header = bin_path_efussion, befusion_header sdr_path, sdr_header = spool_efus, sefusion_header for j in range(nobjs): obj_str = str(j + 1) obj_path = data_path + obj_str + '.mat' data_content = hl.extract_mat_content(obj_path, data_header) num_of_imgs, length = data_content.shape sdrs = np.zeros((num_of_imgs, nbits), dtype=np.int64) for i in range(num_of_imgs): sp.fit(data_content[i]) sp_output = sp.predict(data_content[i]) outp = sp_output * 1 if np.count_nonzero(outp) != int(nbits * 0.2): print j + 1, i, np.count_nonzero(outp) sdrs[i, :] = outp sdr_mat = sdr_path + str(j + 1) + '.mat' sio.savemat(sdr_mat, mdict={sdr_header: sdrs}) print "Spooling for object ", j + 1, " tooks" hl.tac() print "----------------------------------" print "Finished spooling for all objects" hl.tac()
def second_level(log_dir, seed=123456789): # Get the paths to the data paths = [] for d in os.listdir(log_dir): p = os.path.join(log_dir, d) if os.path.isdir(p): paths.append(os.path.join(p, 'predictions.pkl')) paths = sorted(paths)[:16] # Read in the first item, to determine the shape of the data tr, te = SPRegion.load_data(paths[0]) ntrain, ntest = len(tr), len(te) n_base_cols = tr.shape[-1] ninputs = n_base_cols * len(paths) # Read in all of the data tr_x = np.zeros((ntrain, ninputs), dtype='bool') te_x = np.zeros((ntest, ninputs), dtype='bool') for i, p in enumerate(paths): tr, te = SPRegion.load_data(p) tr_x[:, i * n_base_cols:(i + 1) * n_base_cols] = tr te_x[:, i * n_base_cols:(i + 1) * n_base_cols] = te # Read in the labels tr_y, te_y = SPRegion.load_data(os.path.join(log_dir, 'labels.pkl')) # SP arguments ncolumns = 4096 kargs = { 'ninputs': ninputs, 'ncolumns': ncolumns, 'nactive': int(ncolumns * 0.2), 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 100, 'seg_th': 0, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': os.path.join(log_dir, '2-1'), 'clf': LinearSVC(random_state=seed) } # Create the SP sp = SPRegion(**kargs) # Train the SP sp.fit(tr_x, tr_y) # Score the SP print sp.score(te_x, te_y)
def _main3(params, x): """Use by main3 to do the SP training in parallel. @param params: The configuration parameters for the SP. @param x: The data to train the SP on. @return: The SP instance, as well as its predictions on the training data. """ clf = SPRegion(**params) clf.fit(x) y = np.mean(clf.predict(x), 0) y[y >= 0.5] = 1 y[y < 1] = 0 return clf, y
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567): # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 10, 'global_inhibition': True, 'trim': False, 'seed': seed, 'disable_boost': True, 'nsynapses': 392, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.002, 'pwindow': 0.01, 'random_permanence': True, 'nepochs': 10, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in xrange(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the images shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)
def one_cv(base_dir, cv_split): """ Run the MNIST experiment. Only the specified CV split is executed. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. @param cv_split: The index for the CV split. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)), 'rb') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = cPickle.load(f) tr, te = cv[cv_split - 1] # Remove the split directory, if it exists shutil.rmtree(os.path.join(base_dir, str(cv_split)), True) # Execute clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def full_mnist(base_dir, new_dir, auto_update=False): """ Execute a full MNIST run using the parameters specified by ix. @param base_dir: The full path to the base directory. This directory should contain the config. @param new_dir: The full path of where the data should be saved. @param auto_update: If True the permanence increment and decrement amounts will automatically be computed by the runner. If False, the ones specified in the config file will be used. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['log_dir'] = new_dir kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() # Manually compute the permanence update amounts if auto_update: # Compute average sum of each training instance avg_s = tr_x.sum(1) # Compute the total average sum avg_ts = avg_s.mean() # Compute the average active probability a_p = avg_ts / float(tr_x.shape[1]) # Compute the scaling factor scaling_factor = 1 / avg_ts # Compute the update amounts pinc = scaling_factor * (1 / a_p) pdec = scaling_factor * (1 / (1 - a_p)) # Update the config kargs['pinc'], kargs['pdec'] = pinc, pdec # Execute clf = SPRegion(**kargs) clf.fit(tr_x, tr_y) # Column accuracy clf.score(te_x, te_y) # Probabilistic accuracy clf.score(te_x, te_y, tr_x=tr_x, score_method='prob') # Dimensionality reduction method clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction') ndims = len(clf.reduce_dimensions(tr_x[0])) clf._log_stats('Number of New Dimensions', ndims)
def main_local(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3, global_inhibition=True, ncores=4, seed=None): """ Perform CV on a subset of the MNIST dataset. Performs parallelizations on a local machine. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param ncores: The number of cores to use. @param seed: The seed for the random number generators. """ # Run the initialization x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits, seed) # Build the classifier for doing CV clf = RandomizedSearchCV( estimator=SPRegion(**kargs), param_distributions=params, n_iter=niter, # Total runs n_jobs=ncores, # Use this many number of cores pre_dispatch=1 * ncores, # Give each core two jobs at a time iid=True, # Data is iid across folds cv=cv, # The CV split for the data refit=False, # Disable fitting best estimator on full dataset random_state=seed # Force same SP across runs ) # Fit the models clf.fit(x, y) # Extract the CV results parameter_names = sorted(clf.grid_scores_[0].parameters.keys()) parameter_names.pop(parameter_names.index('log_dir')) parameter_values = np.zeros((niter, len(parameter_names))) results = np.zeros((niter, nsplits)) for i, score in enumerate(clf.grid_scores_): parameter_values[i] = np.array( [score.parameters[k] for k in parameter_names]) results[i] = score.cv_validation_scores # Save the CV results with open(os.path.join(log_dir, 'cv_results.pkl'), 'wb') as f: cPickle.dump((parameter_names, parameter_values, results), f, cPickle.HIGHEST_PROTOCOL) with open(os.path.join(log_dir, 'cv_clf.pkl'), 'wb') as f: cPickle.dump((clf.grid_scores_, clf.best_score_, clf.best_params_), f, cPickle.HIGHEST_PROTOCOL)
def full_cv(base_dir): """ Run the MNIST experiment. Each CV split is executed sequentially. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = cPickle.load(f) # Execute each run for tr, te in cv: clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def base_experiment(log_dir, seed=123456789): """ The base experiment. Build an SP using SPDataset and see how it performs. @param log_dir: The full path to the log directory. @param seed: The random seed to use. @return: Tuple containing: SP uniqueness, input uniqueness, SP overlap, input overlap. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 kargs = { 'ninputs': nbits, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Build items to store results npoints = 11 pct_noises = np.linspace(0, 1, npoints) u_sp, u_ip = np.zeros(npoints), np.zeros(npoints) o_sp, o_ip = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) x = ds.data # Get the dataset stats u_ip[i] = metrics.compute_uniqueness(x) * 100 o_ip[i] = metrics.compute_overlap(x) * 100 # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(x) # Get the SP's output SDRs sp_output = sp.predict(x) # Get the stats u_sp[i] = metrics.compute_uniqueness(sp_output) * 100 o_sp[i] = (metrics.compute_overlap(sp_output) + metrics.compute_overlap(np.logical_not(sp_output))) * 50 # Log everything sp._log_stats('% Input Uniqueness', u_ip[i]) sp._log_stats('% Input Overlap', o_ip[i]) sp._log_stats('% SP Uniqueness', u_sp[i]) sp._log_stats('% SP Overlap', o_sp[i]) return u_sp, u_ip, o_sp, o_ip
def run_single_experiment(base_dir, ntrials=10, seed=123456789): """Run the actual experiment. @param base_dir: The directory to containing the experiment to be run. @param ntrials: The number of trials to perform with different seeds. @param seed: The initial seed used to generate the other random seeds. """ # Generate the number of requested seeds seeds = generate_seeds(ntrials, seed) # Get the configuration with open(os.path.join(base_dir, 'config.json'), 'r') as f: config = json.load(f) # Get the data and base metric data with open(os.path.join(base_dir, 'dataset.pkl'), 'rb') as f: data = pickle.load(f) uniqueness_data, overlap_data, correlation_data = pickle.load(f) # Metrics metrics = SPMetrics() # Execute each run for s in seeds: # Update the seed config['seed'] = s # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', metrics.compute_uniqueness(sp_output)) sp._log_stats('SP Overlap', metrics.compute_overlap(sp_output)) sp._log_stats('SP Correlation', 1 - metrics.compute_distance(sp_output))
def base_experiment(pct_noise=0.15, noverlap_bits=0, exp_name='1-1', ntrials=10, verbose=True, seed=123456789): """ Run a single experiment, locally. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param exp_name: The name of the experiment. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 log_dir = os.path.join(os.path.expanduser('~'), 'scratch', 'novelty_experiments', exp_name) # Configure the SP config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format( 1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format( 1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format( np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) c_y_te = 1 - metrics.compute_distance(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i in xrange(ntrials): # Make a new seed seed2 = np.random.randint(1000000) config['seed'] = seed2 config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) c_sp_y_te = 1 - metrics.compute_distance(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('Input Novelty Class Test Correlation', c_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te, c_sp_x_tr, c_sp_x_te, c_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. c_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) c_sp_base_to_y_te += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest c_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Correlation', c_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format( u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format( o_sp_base_to_x_te, o_sp_base_to_y_te) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format( c_sp_base_to_x_te, c_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def fit_grid(): """ Use a grid technique with many SPs. """ p = 'results\\mnist_filter' # try: # os.makedirs(p) # except OSError: # pass np.random.seed(123456789) # kargs = { # 'ninputs': 9, # 'ncolumns': 100, # 'nsynapses': 5, # 'random_permanence': True, # 'pinc':0.03, 'pdec':0.05, # 'seg_th': 3, # 'nactive': 10, # 'duty_cycle': 100, # 'max_boost': 10, # 'global_inhibition': True, # 'trim': 1e-4 # } kargs2 = { 'ninputs': 100 * (26 ** 2), 'ncolumns': 2048, 'nsynapses': 1000, 'random_permanence': True, 'pinc':0.03, 'pdec':0.05, 'seg_th': 5, 'nactive': 20, 'duty_cycle': 100, 'max_boost': 10, 'global_inhibition': True, 'trim': 1e-4 } # Get the data (tr_x, tr_y), (te_x, te_y) = get_data() nwindows = 26 ** 2 # # Make the SPs # sps = [SPRegion(**kargs) for _ in xrange(nwindows)] # # Train the SPs # nepochs = 10 # t = time.time() # for i in xrange(nepochs): # print i # for j, x in enumerate(tr_x): # print '\t{0}'.format(j) # nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape( # nwindows, 9) # for xi, sp in izip(nx, sps): # sp.step(xi) # t1 = time.time() - t # print t1 # # Save this batch of SPs # for i, sp in enumerate(sps): # sp.learn = False # sp.save(os.path.join(p, 'sp0-{0}.pkl'.format(i))) # Make the top level SP sp2 = SPRegion(**kargs2) # Get the SPs sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0'] # Train the top SP nepochs = 10 t = time.time() for i in xrange(nepochs): print i for j, x in enumerate(tr_x): print '\t{0}'.format(j) nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape( nwindows, 9) output = np.array(np.zeros(100 * nwindows), dtype='bool') for k, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) output[k*100:(k*100)+100] = sp.y[:, 0] sp2.step(output) t2 = time.time() - t print t2 # Save the top SP sp2.learn = False sp2.save(os.path.join(p, 'sp1-0.pkl'))
def first_level(log_dir, ntrain=800, ntest=200, nsplits=1, seed=123456789): # Details of the filter win_size = 7 total_win_size = win_size * win_size nwindows = 16 # SP arguments kargs = { 'ninputs': total_win_size, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 35, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': os.path.join(log_dir, '1-1') } # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV tr, te = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed).gen.next() tr, te = tr[:ntrain], te[:ntest] # Store the labels to disk with open(os.path.join(log_dir, 'labels.pkl'), 'wb') as f: cPickle.dump((y[tr], y[te]), f, cPickle.HIGHEST_PROTOCOL) del tr_y del te_y del y # Build the training data train_data = np.zeros((nwindows, ntrain, total_win_size), dtype='bool') for i in xrange(ntrain): xi = x[tr[i]] for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)): train_data[j, i] = window # Build the testing data test_data = np.zeros((nwindows, ntest, total_win_size), dtype='bool') for i in xrange(ntest): xi = x[te[i]] for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)): test_data[j, i] = window del tr_x del te_x del x # Make the SPs sps = [SPRegion(**kargs) for _ in xrange(nwindows)] # Execute the SPs in parallel Parallel(n_jobs=-1)(delayed(execute)(sp, tr, te) for sp, tr, te in izip(sps, train_data, test_data))
def fit_grid(): """ Use a grid technique with many SPs. """ p = 'results\\mnist_filter' # try: # os.makedirs(p) # except OSError: # pass np.random.seed(123456789) # kargs = { # 'ninputs': 9, # 'ncolumns': 100, # 'nsynapses': 5, # 'random_permanence': True, # 'pinc':0.03, 'pdec':0.05, # 'seg_th': 3, # 'nactive': 10, # 'duty_cycle': 100, # 'max_boost': 10, # 'global_inhibition': True, # 'trim': 1e-4 # } kargs2 = { 'ninputs': 100 * (26**2), 'ncolumns': 2048, 'nsynapses': 1000, 'random_permanence': True, 'pinc': 0.03, 'pdec': 0.05, 'seg_th': 5, 'nactive': 20, 'duty_cycle': 100, 'max_boost': 10, 'global_inhibition': True, 'trim': 1e-4 } # Get the data (tr_x, tr_y), (te_x, te_y) = get_data() nwindows = 26**2 # # Make the SPs # sps = [SPRegion(**kargs) for _ in xrange(nwindows)] # # Train the SPs # nepochs = 10 # t = time.time() # for i in xrange(nepochs): # print i # for j, x in enumerate(tr_x): # print '\t{0}'.format(j) # nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape( # nwindows, 9) # for xi, sp in izip(nx, sps): # sp.step(xi) # t1 = time.time() - t # print t1 # # Save this batch of SPs # for i, sp in enumerate(sps): # sp.learn = False # sp.save(os.path.join(p, 'sp0-{0}.pkl'.format(i))) # Make the top level SP sp2 = SPRegion(**kargs2) # Get the SPs sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0'] # Train the top SP nepochs = 10 t = time.time() for i in xrange(nepochs): print i for j, x in enumerate(tr_x): print '\t{0}'.format(j) nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(nwindows, 9) output = np.array(np.zeros(100 * nwindows), dtype='bool') for k, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) output[k * 100:(k * 100) + 100] = sp.y[:, 0] sp2.step(output) t2 = time.time() - t print t2 # Save the top SP sp2.learn = False sp2.save(os.path.join(p, 'sp1-0.pkl'))
def base_experiment(config, pct_noise=0.15, noverlap_bits=0, ntrials=10, verbose=False, seed=123456789): """ Run a single experiment, locally. @param config: The configuration parameters. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 # Build the directory, if needed base_dir = config['log_dir'] if not os.path.exists(base_dir): os.makedirs(base_dir) # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format(1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format(1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format(np.dot( x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i, seed2 in enumerate(generate_seeds(ntrials, seed)): # Create the SP config['seed'] = seed2 sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{6:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(o_sp_base_to_x_te, o_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) # Save the results with open(os.path.join(base_dir, 'results.pkl'), 'wb') as f: cPickle.dump((sp_x_results, sp_y_results, svm_x_results, svm_y_results), f, cPickle.HIGHEST_PROTOCOL)
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789): # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 20, 'global_inhibition': True, 'trim': False, 'seed': seed, 'max_boost': 3, 'duty_cycle': 8, 'nsynapses': 392, 'seg_th': 2, 'syn_th': 0.5, 'pinc': 0.01, 'pdec': 0.02, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 1, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) score = clf.score(x[te], y[te]) print 'SVM Only Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the column method score = sp.score(x[te], y[te]) print 'Column Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the probabilistic method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob') print 'Probabilistic Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the dimensionality reduction method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(sp.reduce_dimensions(x[0])) print 'Input Reduced from {0} to {1}: {2:.1f}X reduction'.format( ninputs, ndims, ninputs / float(ndims)) print 'Reduction Accuracy: {0:.2f}%'.format(score * 100) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in xrange(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the two x1_labels = [str(i) for i in xrange(10)] x2_labels = [str(i) for i in xrange(10)] title = 'Input Reconstruction: Original (top), SP (bottom)' shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_inputs), shape, title, (x1_labels, x2_labels,), path)
def sp_one_level(base_dir, data_path='data.csv', seed=123456789): """ Test the SP. """ # Make a new directory new_dir = os.path.join(base_dir, time.strftime('%Y%m%d-%H%M%S', time.localtime())) os.makedirs(new_dir) # Params nsplits = 8 pct_train = 0.8 # Get data data = pd.read_csv(data_path) x = data.ix[:, :-1].as_matrix() y = data.ix[:, -1].as_matrix() x, y = convert_data_to_int(x, y) # Create the encoder num_bits_per_encoder = 50 category_encoders = [ CategoryEncoder( num_categories=len(set(xi)), num_bits=num_bits_per_encoder ) for xi in x.T ] total_bits = num_bits_per_encoder*len(category_encoders) encoder = MultiEncoder( *category_encoders ) # Build the config for the SP ncolumns = 4096 nactive = int(ncolumns * 0.20) nsynapses = 25 seg_th = 0 sp_config = { 'ninputs': total_bits, 'ncolumns': ncolumns, 'nactive': nactive, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': nsynapses, 'seg_th': seg_th, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 1, 'log_dir': os.path.join(new_dir, '1-1'), 'clf': LinearSVC(random_state=seed) } # Encode all of the data new_x = np.zeros((len(x), total_bits), dtype='bool') for i in xrange(len(x)): encoder.bind_data([(x[i,j], j) for j in xrange(x.shape[1])]) new_x[i] = np.array(list(encoder.encode()), dtype='bool') # Dump the data and the details with open(os.path.join(new_dir, 'input.pkl'), 'wb') as f: cPickle.dump((new_x, y), f, cPickle.HIGHEST_PROTOCOL) with open(os.path.join(new_dir, 'details.csv'), 'wb') as f: writer = csv.writer(f) category_encoder_details = [[ 'Category {0}: Num bits: {1}'.format(i, c.num_bits), 'Category {0}: Active bits: {1}'.format(i, c.active_bits), 'Category {0}: Num categories: {1}'.format(i, c.num_categories)] for i, c in enumerate(category_encoders)] writer.writerows(category_encoder_details) writer.writerow(['Num splits', nsplits]) writer.writerow(['% train', pct_train]) writer.writerow(['Seed', seed]) # Run the experiment sss = StratifiedShuffleSplit(y, n_iter=nsplits, train_size=pct_train, random_state=seed) results = Parallel(n_jobs=-1)(delayed(train_score_clf)( SPRegion(**sp_config), new_x[tr], new_x[te], y[tr], y[te]) for i, (tr, te) in enumerate(sss)) pct_accuracy = np.median(results) print ['{0:.3f}'.format(r) for r in results] print 'SP + Linear SVM: {0:.3f} %'.format(pct_accuracy) with open(os.path.join(new_dir, 'details.csv'), 'ab') as f: writer = csv.writer(f) writer.writerow(['% Accuracy', pct_accuracy])
def run_single_experiment(base_dir, ntrials=10, seed=123456789): """ Run the actual experiment. @param base_dir: The directory to containing the experiment to be run. @param ntrials: The number of trials to perform with different seeds. @param seed: The initial seed used to generate the other random seeds. """ # Generate the number of requested seeds seeds = generate_seeds(ntrials, seed) # Get the configuration with open(os.path.join(base_dir, 'config.json'), 'rb') as f: config = json.load(f) # Get the data and base metric data with open(os.path.join(base_dir, 'dataset.pkl'), 'rb') as f: data = cPickle.load(f) uniqueness_data, overlap_data, correlation_data = cPickle.load(f) # Metrics metrics = SPMetrics() # Execute each run for s in seeds: # Update the seed config['seed'] = s # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', metrics.compute_uniqueness(sp_output)) sp._log_stats('SP Overlap', metrics.compute_overlap(sp_output)) sp._log_stats('SP Correlation', 1 - metrics.compute_distance( sp_output))
def main(): """ Program entry. Build an SP using SPDataset and see how it performs. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 seed = 123456789 base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple') kargs = { 'ninputs': nbits, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': os.path.join(base_path, '1-1') } # Build items to store results npoints = 25 pct_noises = np.linspace(0, 1, npoints, False) uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints) similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints) similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints) similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints) dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints) overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints) correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): print 'Iteration {0} of {1}'.format(i + 1, npoints) # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) # Get the dataset stats uniqueness_data[i] = metrics.compute_uniqueness(ds.data) similarity_data[i] = metrics.compute_total_similarity( ds.data, confidence_interval=0.9) similarity_data1[i] = metrics.compute_one_similarity( ds.data, confidence_interval=0.9) similarity_data0[i] = metrics.compute_zero_similarity( ds.data, confidence_interval=0.9) dissimilarity_data[i] = metrics.compute_dissimilarity( ds.data, confidence_interval=0.9) overlap_data[i] = metrics.compute_overlap(ds.data) correlation_data[i] = 1 - metrics.compute_distance(ds.data) # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(ds.data) # Get the SP's output SDRs sp_output = sp.predict(ds.data) # Get the stats uniqueness_sp[i] = metrics.compute_uniqueness(sp_output) similarity_sp[i] = metrics.compute_total_similarity( sp_output, confidence_interval=0.9) similarity_sp1[i] = metrics.compute_one_similarity( sp_output, confidence_interval=0.9) similarity_sp0[i] = metrics.compute_zero_similarity( sp_output, confidence_interval=0.9) dissimilarity_sp[i] = metrics.compute_dissimilarity( sp_output, confidence_interval=0.9) overlap_sp[i] = metrics.compute_overlap(sp_output) correlation_sp[i] = 1 - metrics.compute_distance(sp_output) # Make some plots print 'Showing uniqueness - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [uniqueness_data * 100, uniqueness_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Uniqueness [%]', xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'uniqueness.png'), show=True) print 'Showing total similarity - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data * 100, similarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Total similarity [%]', xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'similarity.png'), show=True) print 'Showing similarity of "1" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data1 * 100, similarity_sp1 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '1's [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'one_similarity.png'), show=True) print 'Showing similarity of "0" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data0 * 100, similarity_sp0 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '0's [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'zero_similarity.png'), show=True) print 'Showing dissimilarity - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [dissimilarity_data * 100, dissimilarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Dissimilarity [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'dissimilarity.png'), show=True) print 'Showing average normalized overlap - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [overlap_data * 100, overlap_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Normalized Overlap", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'overlap.png'), show=True) print 'Showing % average sample correlation coefficient - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [correlation_data * 100, correlation_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Correlation", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'correlation.png'), show=True) print '*** All data saved in "{0}" ***'.format(base_path)
def local_experiment(): """ Run a single experiment, locally. """ seed = 123456789 config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 100, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments', '1-1') } # Get the data nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15 ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed) data = ds.data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Get the metrics for the SP's results sp_uniqueness = metrics.compute_uniqueness(sp_output) sp_overlap = metrics.compute_overlap(sp_output) sp_correlation = 1 - metrics.compute_distance(sp_output) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', sp_uniqueness) sp._log_stats('SP Overlap', sp_overlap) sp._log_stats('SP Correlation', sp_correlation) print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(uniqueness_data, sp_uniqueness) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(overlap_data, sp_overlap) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(correlation_data, sp_correlation) # Get a new random input ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123) print '\n% Overlapping old class to new: \t{0:2.4f}%'.format( (float(np.dot(ds.input, ds2.input)) / nbits) * 100) # Test the SP on the new dataset sp_output2 = sp.predict(ds2.data) # Get average representation of first result original_result = np.mean(sp_output, 0) original_result[original_result >= 0.5] = 1 original_result[original_result < 1] = 0 # Get averaged results for each metric type sp_uniqueness2 = 0. sp_overlap2 = 0. sp_correlation2 = 0. for item in sp_output2: test = np.vstack((original_result, item)) sp_uniqueness2 = metrics.compute_uniqueness(test) sp_overlap2 = metrics.compute_overlap(test) sp_correlation2 = 1 - metrics.compute_distance(test) sp_uniqueness2 /= len(sp_output2) sp_overlap2 /= len(sp_output2) sp_correlation2 /= len(sp_output2) print sp_uniqueness2, sp_overlap2, sp_correlation2
def local_experiment(): """Run a single experiment, locally.""" seed = 123456789 config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 100, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments', '1-1') } # Get the data nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15 ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed) data = ds.data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Get the metrics for the SP's results sp_uniqueness = metrics.compute_uniqueness(sp_output) sp_overlap = metrics.compute_overlap(sp_output) sp_correlation = 1 - metrics.compute_distance(sp_output) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', sp_uniqueness) sp._log_stats('SP Overlap', sp_overlap) sp._log_stats('SP Correlation', sp_correlation) print(f'Uniqueness:\t{uniqueness_data:2.4f}\t{sp_uniqueness:2.4f}') print(f'Overlap:\t{overlap_data:2.4f}\t{sp_overlap:2.4f}') print(f'Correlation:\t{correlation_data:2.4f}\t{sp_correlation:2.4f}') # Get a new random input ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123) print(f'\n% Overlapping old class to new: \ \t{(float(np.dot(ds.input, ds2.input)) / nbits) * 100:2.4f}%') # Test the SP on the new dataset sp_output2 = sp.predict(ds2.data) # Get average representation of first result original_result = np.mean(sp_output, 0) original_result[original_result >= 0.5] = 1 original_result[original_result < 1] = 0 # Get averaged results for each metric type sp_uniqueness2 = 0. sp_overlap2 = 0. sp_correlation2 = 0. for item in sp_output2: test = np.vstack((original_result, item)) sp_uniqueness2 = metrics.compute_uniqueness(test) sp_overlap2 = metrics.compute_overlap(test) sp_correlation2 = 1 - metrics.compute_distance(test) sp_uniqueness2 /= len(sp_output2) sp_overlap2 /= len(sp_output2) sp_correlation2 /= len(sp_output2) print(sp_uniqueness2, sp_overlap2, sp_correlation2)
def base_experiment(config, ntrials=1, seed=123456789): """ Run a single experiment, locally. @param config: The configuration parameters to use for the SP. @param ntrials: The number of times to repeat the experiment. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 clf_th = 0.5 # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() tr_x_0 = np.random.permutation(tr_x[tr_y == 0]) x_tr = tr_x_0[:ntrain] x_te = tr_x_0[ntrain:ntrain + ntest] outliers = [np.random.permutation(tr_x[tr_y == i])[:ntest] for i in xrange(1, 10)] # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te, o_y_te, c_y_te = [], [], [] for outlier in outliers: u_y_te.append(metrics.compute_uniqueness(outlier)) o_y_te.append(metrics.compute_overlap(outlier)) c_y_te.append(1 - metrics.compute_distance(outlier)) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = [np.zeros(ntrials) for _ in xrange(9)] svm_x_results = np.zeros(ntrials) svm_y_results = [np.zeros(ntrials) for _ in xrange(9)] # Iterate across the trials: for nt in xrange(ntrials): # Make a new seeod seed2 = np.random.randint(1000000) config['seed'] = seed2 # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = [sp.predict(outlier) for outlier in outliers] # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te, o_sp_y_te, c_sp_y_te = [], [], [] for y in sp_y_te: u_sp_y_te.append(metrics.compute_uniqueness(y)) o_sp_y_te.append(metrics.compute_overlap(y)) c_sp_y_te.append(1 - metrics.compute_distance(y)) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) for i, (a, b, c, d, e, f) in enumerate(zip(u_y_te, o_y_te, c_y_te, u_sp_y_te, o_sp_y_te, c_sp_y_te), 1): sp._log_stats('Input Novelty Class {0} Uniqueness'.format(i), a) sp._log_stats('Input Novelty Class {0} Overlap'.format(i), b) sp._log_stats('Input Novelty Class {0} Correlation'.format(i), c) sp._log_stats('SP Novelty Class {0} Uniqueness'.format(i), d) sp._log_stats('SP Novelty Class {0} Overlap'.format(i), e) sp._log_stats('SP Novelty Class {0} Correlation'.format(i), f) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp, o_sp, c_sp = np.zeros(9), np.zeros(9), np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) u_sp[j] += metrics.compute_uniqueness(yt) o_sp[j] += metrics.compute_overlap(yt) c_sp[j] += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest for i in xrange(9): u_sp[i] /= ntest o_sp[i] /= ntest c_sp[i] /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('Base Train to Novelty {0} Uniqueness'.format(j), u_sp[i]) sp._log_stats('Base Train to Novelty {0} Overlap'.format(j), o_sp[i]) sp._log_stats('Base Train to Novelty {0} Correlation'.format(j), c_sp[i]) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = np.array([len(np.where(clf.predict(outlier) == -1)[0]) / float(ntest) * 100 for outlier in outliers]) # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) xo = metrics.compute_overlap(xt) if xo >= clf_th: clf_x_te += 1 for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) yo = metrics.compute_overlap(yt) if yo < clf_th: clf_y_te[j] += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[nt] = 100 - clf_x_te sp_y_results[nt] = 100 - clf_y_te svm_x_results[nt] = 100 - svm_x_te svm_y_results[nt] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('SP % Correct Novelty Class {0}'.format(j), clf_y_te[i]) sp._log_stats('SVM % Correct Novelty Class {0}'.format(j), svm_y_te[i]) sp._log_stats('SP % Mean Correct Novelty Class', np.mean(clf_y_te)) sp._log_stats('SVM % Mean Correct Novelty Class', np.mean(svm_y_te)) sp._log_stats('SP % Adjusted Score', (np.mean(clf_y_te) * clf_x_te) / 100) sp._log_stats('SVM % Adjusted Score', (np.mean(svm_y_te) * svm_x_te) / 100) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def main(ds, p, ncols=2048, duty_cycle=100, nepochs=10, global_inhibition=True, seed=123456789): """Run an experiment. @param ds: The dataset. @param p: The full path to the directory to save the results. @param ncols: The number of columns. @param duty_cycle: The duty cycle. @param nepochs: The number of epochs @param global_inhibition: If True use global inhibition otherwise use local inhibition. @param seed: The random seed. """ # Get some parameters ninputs = ds.shape[1] density = np.sum(ds[0]) / float(ninputs) # Make the directory if it doesn't exist try: os.makedirs(p) except OSError: pass # Initializations np.random.seed(seed) kargs = { 'ninputs': ninputs, 'ncolumns': ncols, 'nsynapses': 40, 'random_permanence': True, 'pinc': 0.03, 'pdec': 0.05, 'seg_th': 15, 'nactive': int(0.02 * ncols), 'duty_cycle': duty_cycle, 'max_boost': 10, 'global_inhibition': global_inhibition, 'trim': 1e-4 } # Create the region delattr(SPRegion, '_phase3') setattr(SPRegion, '_phase3', _phase3) sp = SPRegion(**kargs) sp.iter, sp.out_path = 1, p # Train the region t = time.time() for i in range(nepochs): for j, x in enumerate(ds): sp.execute(x) sp.iter += 1 t = time.time() - t # Dump the details kargs['density'] = density kargs['seed'] = seed kargs['nepochs'] = nepochs kargs['time'] = t with open(os.path.join(p, 'details.json'), 'w') as f: f.write( json.dumps(kargs, sort_keys=True, indent=4, separators=(',', ': ')))
def base_experiment(config, pct_noise=0.15, noverlap_bits=0, ntrials=10, verbose=False, seed=123456789): """ Run a single experiment, locally. @param config: The configuration parameters. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 # Build the directory, if needed base_dir = config['log_dir'] if not os.path.exists(base_dir): os.makedirs(base_dir) # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format( 1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format( 1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format( np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i, seed2 in enumerate(generate_seeds(ntrials, seed)): # Create the SP config['seed'] = seed2 sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{6:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format( u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format( o_sp_base_to_x_te, o_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) # Save the results with open(os.path.join(base_dir, 'results.pkl'), 'wb') as f: cPickle.dump( (sp_x_results, sp_y_results, svm_x_results, svm_y_results), f, cPickle.HIGHEST_PROTOCOL)
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789): """Run a simple MNIST classification task.""" # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 30, 'global_inhibition': True, 'trim': False, 'seed': seed, 'disable_boost': True, 'nsynapses': 392, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.002, 'pwindow': 0.01, 'random_permanence': True, 'nepochs': 10, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) score = clf.score(x[te], y[te]) print('SVM Only Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the column method score = sp.score(x[te], y[te]) print('Column Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the probabilistic method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob') print('Probabilistic Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the dimensionality reduction method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(sp.reduce_dimensions(x[0])) print('Input Reduced from {0} to {1}: {2:.1f}X reduction'.format( ninputs, ndims, ninputs / float(ndims))) print('Reduction Accuracy: {0:.2f}%'.format(score * 100)) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in range(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the images title = 'Input Reconstruction: Original (top), SP SDRs (middle), ' \ 'SP Reconstruction (bottom)' shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_pred, sp_inputs), shape, title, out_path=path)
def base_experiment(pct_noise=0.15, noverlap_bits=0, exp_name='1-1', ntrials=10, verbose=True, seed=123456789): """ Run a single experiment, locally. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param exp_name: The name of the experiment. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 log_dir = os.path.join(os.path.expanduser('~'), 'scratch', 'novelty_experiments', exp_name) # Configure the SP config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format(1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format(1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format(np.dot( x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) c_y_te = 1 - metrics.compute_distance(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i in xrange(ntrials): # Make a new seed seed2 = np.random.randint(1000000) config['seed'] = seed2 config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) c_sp_y_te = 1 - metrics.compute_distance(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('Input Novelty Class Test Correlation', c_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te, c_sp_x_tr, c_sp_x_te, c_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. c_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) c_sp_base_to_y_te += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest c_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Correlation', c_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(o_sp_base_to_x_te, o_sp_base_to_y_te) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(c_sp_base_to_x_te, c_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def main(ncols, npsyns, ninputs, density, seg_th, syn_th, ntrials=100, seed=123456789): """ Compare the theoretical to the experimental results. @param ncols: The number of columns. @param npsyns: The number of proximal synapses. @param ninputs: The number of inputs. @param density: The percentage of active bits in the input. @param seg_th: The threshold for a segment to become active. @param syn_th: The threshold at which synapses are connected. @param ntrials: The number of trials to perform for the experimental results. @param seed: Seed for the random number generator for """ print '**** THEORETICAL ****' print 'Probability that an input will be selected: {0:2.2f}%'.format( p_a1(npsyns, ninputs) * 100) p = p_c(ncols, npsyns, ninputs) print 'Probability of all inputs being selected: {0:2.2f}%'.format((1 - p) * 100) print 'Expected inputs not seen:', int(p * ninputs ) print 'Expected number of columns connected to an input:', int(e_b(ncols, npsyns, ninputs)) print 'Expected number of active synapses on a column: {0:2.2f}'.format( e_c(npsyns, density)) print 'Expected number of active connected synapses on a column: ' \ '{0:2.2f}'.format(e_d(npsyns, density, syn_th)) print 'Expected number of columns with active inputs >= seg_th: {0:2.2f}' \ .format(e_e(npsyns, density, ncols, seg_th)) print 'Expected number of columns with active connected inputs >= ' \ 'seg_th: {0:2.2f}'.format(e_f(npsyns, density, ncols, seg_th, syn_th)) # Prep the experimental print '\n**** Experimental ****' np.random.seed(seed) kargs = { 'ninputs': ninputs, 'ncolumns': ncols, 'nsynapses': npsyns, 'syn_th': syn_th, 'seg_th': seg_th } #### Average number of active bits potentially connected to a column # Build input x = np.zeros(ninputs, dtype='bool') nactive = int(ninputs * (density)) indexes = set(np.random.randint(0, ninputs, nactive)) while len(indexes) != nactive: indexes.add(np.random.randint(0, ninputs - 1, 1)[0]) x[list(indexes)] = True # Simulate y0 = y1 = y2 = y3 = y4 = y5 = 0. for _ in xrange(ntrials): sp = SPRegion(**kargs) a = x[sp.syn_map].sum(1) b = (x[sp.syn_map] * (sp.p >= syn_th)).sum(1) y0 += ninputs - len(set(sp.syn_map.ravel())) y1 += np.mean(np.array([np.sum(sp.syn_map == i) for i in xrange(ninputs)])) y2 += a.mean() y3 += b.mean() y4 += (a >= seg_th).sum() y5 += (b >= seg_th).sum() print 'Average number of missing inputs: {0:.2f}'.format(y0 / ntrials) print 'Average number of columns connected to an input: {0:.2f}'.format( y1 / ntrials) print 'Average number of active inputs per column: {0:.2f}'.format(y2 / ntrials) print 'Average number of active connected inputs per column: {0:.2f}' \ .format(y3 / ntrials) print 'Number of columns with active inputs >= seg_th {0:.2f}'.format( y4 / ntrials) print 'Number of columns with active connected inputs >= seg_th {0:.2f}' \ .format(y5 / ntrials)
def main(): """ Program entry. Build an SP using SPDataset and see how it performs. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 ncolumns = 300 base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple') seed = 123456789 kargs = { 'ninputs': nbits, 'ncolumns': 300, 'nactive': 0.02 * ncolumns, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 20, 'seg_th': 2, 'syn_th': 0.5, 'pinc': 0.01, 'pdec': 0.01, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 1, 'log_dir': os.path.join(base_path, '1-1') } # Build items to store results npoints = 25 pct_noises = np.linspace(0, pct_active / 2, npoints, False) uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints) similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints) similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints) similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints) dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints) overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints) correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): print 'Iteration {0} of {1}'.format(i + 1, npoints) # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) # Get the dataset stats uniqueness_data[i] = metrics.compute_uniqueness(ds.data) similarity_data[i] = metrics.compute_total_similarity(ds.data, confidence_interval=0.9) similarity_data1[i] = metrics.compute_one_similarity(ds.data, confidence_interval=0.9) similarity_data0[i] = metrics.compute_zero_similarity(ds.data, confidence_interval=0.9) dissimilarity_data[i] = metrics.compute_dissimilarity(ds.data, confidence_interval=0.9) overlap_data[i] = metrics.compute_overlap(ds.data) correlation_data[i] = 1 - metrics.compute_distance(ds.data) # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(ds.data) # Get the SP's output SDRs sp_output = sp.predict(ds.data) # Get the stats uniqueness_sp[i] = metrics.compute_uniqueness(sp_output) similarity_sp[i] = metrics.compute_total_similarity(sp_output, confidence_interval=0.9) similarity_sp1[i] = metrics.compute_one_similarity(sp_output, confidence_interval=0.9) similarity_sp0[i] = metrics.compute_zero_similarity(sp_output, confidence_interval=0.9) dissimilarity_sp[i] = metrics.compute_dissimilarity(sp_output, confidence_interval=0.9) overlap_sp[i] = metrics.compute_overlap(sp_output) correlation_sp[i] = 1 - metrics.compute_distance(sp_output) # Make some plots print 'Showing uniqueness - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [uniqueness_data * 100, uniqueness_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Uniqueness [%]', xlim=False, ylim=False, out_path=os.path.join(base_path, 'uniqueness.png'), show=True) print 'Showing total similarity - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data * 100, similarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Total similarity [%]', xlim=False, ylim=False, out_path=os.path.join(base_path, 'similarity.png'), show=True) print 'Showing similarity of "1" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data1 * 100, similarity_sp1 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '1's [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'one_similarity.png'), show=True) print 'Showing similarity of "0" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data0 * 100, similarity_sp0 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '0's [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'zero_similarity.png'), show=True) print 'Showing dissimilarity - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [dissimilarity_data * 100, dissimilarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Dissimilarity [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'dissimilarity.png'), show=True) print 'Showing average normalized overlap - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [overlap_data * 100, overlap_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Normalized Overlap", xlim=False, ylim=False, out_path=os.path.join(base_path, 'overlap.png'), show=True) print 'Showing % average sample correlation cofficient - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [correlation_data * 100, correlation_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Correlation", xlim=False, ylim=False, out_path=os.path.join(base_path, 'correlation.png'), show=True) print '*** All data saved in "{0}" ***'.format(base_path)
def base_experiment(log_dir, seed = 123456789): """ The base experiment. Build an SP using SPDataset and see how it performs. @param log_dir: The full path to the log directory. @param seed: The random seed to use. @return: Tuple containing: SP uniqueness, input uniqueness, SP overlap, input overlap. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 kargs = { 'ninputs': nbits, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Build items to store results npoints = 11 pct_noises = np.linspace(0, 1, npoints) u_sp, u_ip = np.zeros(npoints), np.zeros(npoints) o_sp, o_ip = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) x = ds.data # Get the dataset stats u_ip[i] = metrics.compute_uniqueness(x) * 100 o_ip[i] = metrics.compute_overlap(x) * 100 # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(x) # Get the SP's output SDRs sp_output = sp.predict(x) # Get the stats u_sp[i] = metrics.compute_uniqueness(sp_output) * 100 o_sp[i] = (metrics.compute_overlap(sp_output) + metrics.compute_overlap(np.logical_not(sp_output))) * 50 # Log everything sp._log_stats('% Input Uniqueness', u_ip[i]) sp._log_stats('% Input Overlap', o_ip[i]) sp._log_stats('% SP Uniqueness', u_sp[i]) sp._log_stats('% SP Overlap', o_sp[i]) return u_sp, u_ip, o_sp, o_ip
def main(ds, p, ncols=2048, duty_cycle=100, nepochs=10, global_inhibition=True, seed=123456789): """ Run an experiment. @param ds: The dataset. @param p: The full path to the directory to save the results. @param ncols: The number of columns. @param duty_cycle: The duty cycle. @param nepochs: The number of epochs @param global_inhibition: If True use global inhibition otherwise use local inhibition. @param seed: The random seed. """ # Get some parameters ninputs = ds.shape[1] density = np.sum(ds[0]) / float(ninputs) # Make the directory if it doesn't exist try: os.makedirs(p) except OSError: pass # Initializations np.random.seed(seed) kargs = { 'ninputs': ninputs, 'ncolumns': ncols, 'nsynapses': 40, 'random_permanence': True, 'pinc':0.03, 'pdec':0.05, 'seg_th': 15, 'nactive': int(0.02 * ncols), 'duty_cycle': duty_cycle, 'max_boost': 10, 'global_inhibition': global_inhibition, 'trim': 1e-4 } # Create the region delattr(SPRegion, '_phase3') setattr(SPRegion, '_phase3', _phase3) sp = SPRegion(**kargs) sp.iter, sp.out_path = 1, p # Train the region t = time.time() for i in xrange(nepochs): for j, x in enumerate(ds): sp.execute(x) sp.iter += 1 t = time.time() - t # Dump the details kargs['density'] = density kargs['seed'] = seed kargs['nepochs'] = nepochs kargs['time'] = t with open(os.path.join(p, 'details.json'), 'wb') as f: f.write(json.dumps(kargs, sort_keys=True, indent=4, separators=(',', ': ')))