def run_single_experiment(base_dir, ntrials=10, seed=123456789): """ Run the actual experiment. @param base_dir: The directory to containing the experiment to be run. @param ntrials: The number of trials to perform with different seeds. @param seed: The initial seed used to generate the other random seeds. """ # Generate the number of requested seeds seeds = generate_seeds(ntrials, seed) # Get the configuration with open(os.path.join(base_dir, 'config.json'), 'rb') as f: config = json.load(f) # Get the data and base metric data with open(os.path.join(base_dir, 'dataset.pkl'), 'rb') as f: data = cPickle.load(f) uniqueness_data, overlap_data, correlation_data = cPickle.load(f) # Metrics metrics = SPMetrics() # Execute each run for s in seeds: # Update the seed config['seed'] = s # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', metrics.compute_uniqueness(sp_output)) sp._log_stats('SP Overlap', metrics.compute_overlap(sp_output)) sp._log_stats('SP Correlation', 1 - metrics.compute_distance( sp_output))
def local_experiment(): """Run a single experiment, locally.""" seed = 123456789 config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 100, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments', '1-1') } # Get the data nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15 ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed) data = ds.data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Get the metrics for the SP's results sp_uniqueness = metrics.compute_uniqueness(sp_output) sp_overlap = metrics.compute_overlap(sp_output) sp_correlation = 1 - metrics.compute_distance(sp_output) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', sp_uniqueness) sp._log_stats('SP Overlap', sp_overlap) sp._log_stats('SP Correlation', sp_correlation) print(f'Uniqueness:\t{uniqueness_data:2.4f}\t{sp_uniqueness:2.4f}') print(f'Overlap:\t{overlap_data:2.4f}\t{sp_overlap:2.4f}') print(f'Correlation:\t{correlation_data:2.4f}\t{sp_correlation:2.4f}') # Get a new random input ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123) print(f'\n% Overlapping old class to new: \ \t{(float(np.dot(ds.input, ds2.input)) / nbits) * 100:2.4f}%') # Test the SP on the new dataset sp_output2 = sp.predict(ds2.data) # Get average representation of first result original_result = np.mean(sp_output, 0) original_result[original_result >= 0.5] = 1 original_result[original_result < 1] = 0 # Get averaged results for each metric type sp_uniqueness2 = 0. sp_overlap2 = 0. sp_correlation2 = 0. for item in sp_output2: test = np.vstack((original_result, item)) sp_uniqueness2 = metrics.compute_uniqueness(test) sp_overlap2 = metrics.compute_overlap(test) sp_correlation2 = 1 - metrics.compute_distance(test) sp_uniqueness2 /= len(sp_output2) sp_overlap2 /= len(sp_output2) sp_correlation2 /= len(sp_output2) print(sp_uniqueness2, sp_overlap2, sp_correlation2)
def run_experiment(experiments, base_dir, nsamples=500, nbits=100, pct_active=0.4, pct_noise=0.15, seed=123456789, ntrials=10, partition_name='debug', this_dir=os.getcwd()): """Run an experiment for the SP. This experiment is used to vary various sets of parameters on the SP dataset. This function uses SLURM to conduct the experiments. @param experiments: A list containing the experiments details. Refer to one of the examples in this module for more details. @param base_dir: The base directory to use for logging. @param nsamples: The number of samples to add to the dataset. @param nbits: The number of bits each sample should have. @param pct_active: The percentage of bits that will be active in the base class SDR. @param pct_noise: The percentage of noise to add to the data. @param seed: The seed used to initialize the random number generator. @param ntrials: The number of parameter trials to use. Each iteration will be used to initialize the SP in a different manner. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Create the dataset data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Prep each experiment for execution for experiment_name, time_limit, memory_limit, params in experiments: # Iterate through each type of inhibition type for i, global_inhibition in enumerate((True, False)): # Get base configuration base_config = create_base_config(base_dir, experiment_name, global_inhibition) # Add the parameters for param_name, param_value in params: base_config[param_name] = param_value config_gen = ConfigGenerator(base_config, ntrials) # Make the configurations for config in config_gen.get_config(): # Make the base directory dir = config['log_dir'] splits = os.path.basename(dir).split('-') base_name = '-'.join(s for s in splits[:-1]) dir = os.path.join(os.path.dirname(dir), base_name) try: os.makedirs(dir) except OSError: pass # Dump the config as JSON s = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'w') as f: f.write(s) # Dump the dataset and the metrics with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) pickle.dump( (uniqueness_data, overlap_data, correlation_data), f, pickle.HIGHEST_PROTOCOL) # Create the runner this_path = os.path.join(this_dir, 'parameter_exploration.py') command = 'python "{0}" "{1}" {2} {3}'.format( this_path, dir, ntrials, seed) runner_path = os.path.join(dir, 'runner.sh') job_name = '{0}_{1}{2}'.format( experiment_name, 'G' if global_inhibition else 'L', base_name) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit[i], memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def main(): """ Program entry. Build an SP using SPDataset and see how it performs. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 seed = 123456789 base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple') kargs = { 'ninputs': nbits, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': os.path.join(base_path, '1-1') } # Build items to store results npoints = 25 pct_noises = np.linspace(0, 1, npoints, False) uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints) similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints) similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints) similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints) dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints) overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints) correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): print 'Iteration {0} of {1}'.format(i + 1, npoints) # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) # Get the dataset stats uniqueness_data[i] = metrics.compute_uniqueness(ds.data) similarity_data[i] = metrics.compute_total_similarity( ds.data, confidence_interval=0.9) similarity_data1[i] = metrics.compute_one_similarity( ds.data, confidence_interval=0.9) similarity_data0[i] = metrics.compute_zero_similarity( ds.data, confidence_interval=0.9) dissimilarity_data[i] = metrics.compute_dissimilarity( ds.data, confidence_interval=0.9) overlap_data[i] = metrics.compute_overlap(ds.data) correlation_data[i] = 1 - metrics.compute_distance(ds.data) # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(ds.data) # Get the SP's output SDRs sp_output = sp.predict(ds.data) # Get the stats uniqueness_sp[i] = metrics.compute_uniqueness(sp_output) similarity_sp[i] = metrics.compute_total_similarity( sp_output, confidence_interval=0.9) similarity_sp1[i] = metrics.compute_one_similarity( sp_output, confidence_interval=0.9) similarity_sp0[i] = metrics.compute_zero_similarity( sp_output, confidence_interval=0.9) dissimilarity_sp[i] = metrics.compute_dissimilarity( sp_output, confidence_interval=0.9) overlap_sp[i] = metrics.compute_overlap(sp_output) correlation_sp[i] = 1 - metrics.compute_distance(sp_output) # Make some plots print 'Showing uniqueness - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [uniqueness_data * 100, uniqueness_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Uniqueness [%]', xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'uniqueness.png'), show=True) print 'Showing total similarity - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data * 100, similarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Total similarity [%]', xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'similarity.png'), show=True) print 'Showing similarity of "1" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data1 * 100, similarity_sp1 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '1's [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'one_similarity.png'), show=True) print 'Showing similarity of "0" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data0 * 100, similarity_sp0 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '0's [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'zero_similarity.png'), show=True) print 'Showing dissimilarity - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [dissimilarity_data * 100, dissimilarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Dissimilarity [%]", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'dissimilarity.png'), show=True) print 'Showing average normalized overlap - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [overlap_data * 100, overlap_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Normalized Overlap", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'overlap.png'), show=True) print 'Showing % average sample correlation coefficient - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [correlation_data * 100, correlation_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Correlation", xlim=False, ylim=(-5, 105), out_path=os.path.join(base_path, 'correlation.png'), show=True) print '*** All data saved in "{0}" ***'.format(base_path)
def base_experiment(pct_noise=0.15, noverlap_bits=0, exp_name='1-1', ntrials=10, verbose=True, seed=123456789): """ Run a single experiment, locally. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param exp_name: The name of the experiment. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 log_dir = os.path.join(os.path.expanduser('~'), 'scratch', 'novelty_experiments', exp_name) # Configure the SP config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format( 1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format( 1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format( np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) c_y_te = 1 - metrics.compute_distance(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i in xrange(ntrials): # Make a new seed seed2 = np.random.randint(1000000) config['seed'] = seed2 config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) c_sp_y_te = 1 - metrics.compute_distance(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('Input Novelty Class Test Correlation', c_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te, c_sp_x_tr, c_sp_x_te, c_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. c_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) c_sp_base_to_y_te += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest c_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Correlation', c_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format( u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format( o_sp_base_to_x_te, o_sp_base_to_y_te) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format( c_sp_base_to_x_te, c_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def main(): """ Program entry. Build an SP using SPDataset and see how it performs. """ # Params nsamples, nbits, pct_active = 500, 100, 0.4 ncolumns = 300 base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple') seed = 123456789 kargs = { 'ninputs': nbits, 'ncolumns': 300, 'nactive': 0.02 * ncolumns, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 20, 'seg_th': 2, 'syn_th': 0.5, 'pinc': 0.01, 'pdec': 0.01, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 1, 'log_dir': os.path.join(base_path, '1-1') } # Build items to store results npoints = 25 pct_noises = np.linspace(0, pct_active / 2, npoints, False) uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints) similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints) similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints) similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints) dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints) overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints) correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints) # Metrics metrics = SPMetrics() # Vary input noise for i, pct_noise in enumerate(pct_noises): print 'Iteration {0} of {1}'.format(i + 1, npoints) # Build the dataset ds = SPDataset(nsamples=nsamples, nbits=nbits, pct_active=pct_active, pct_noise=pct_noise, seed=seed) # Get the dataset stats uniqueness_data[i] = metrics.compute_uniqueness(ds.data) similarity_data[i] = metrics.compute_total_similarity(ds.data, confidence_interval=0.9) similarity_data1[i] = metrics.compute_one_similarity(ds.data, confidence_interval=0.9) similarity_data0[i] = metrics.compute_zero_similarity(ds.data, confidence_interval=0.9) dissimilarity_data[i] = metrics.compute_dissimilarity(ds.data, confidence_interval=0.9) overlap_data[i] = metrics.compute_overlap(ds.data) correlation_data[i] = 1 - metrics.compute_distance(ds.data) # Build the SP sp = SPRegion(**kargs) # Train the region sp.fit(ds.data) # Get the SP's output SDRs sp_output = sp.predict(ds.data) # Get the stats uniqueness_sp[i] = metrics.compute_uniqueness(sp_output) similarity_sp[i] = metrics.compute_total_similarity(sp_output, confidence_interval=0.9) similarity_sp1[i] = metrics.compute_one_similarity(sp_output, confidence_interval=0.9) similarity_sp0[i] = metrics.compute_zero_similarity(sp_output, confidence_interval=0.9) dissimilarity_sp[i] = metrics.compute_dissimilarity(sp_output, confidence_interval=0.9) overlap_sp[i] = metrics.compute_overlap(sp_output) correlation_sp[i] = 1 - metrics.compute_distance(sp_output) # Make some plots print 'Showing uniqueness - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [uniqueness_data * 100, uniqueness_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Uniqueness [%]', xlim=False, ylim=False, out_path=os.path.join(base_path, 'uniqueness.png'), show=True) print 'Showing total similarity - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data * 100, similarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label='Total similarity [%]', xlim=False, ylim=False, out_path=os.path.join(base_path, 'similarity.png'), show=True) print 'Showing similarity of "1" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data1 * 100, similarity_sp1 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '1's [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'one_similarity.png'), show=True) print 'Showing similarity of "0" bits - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [similarity_data0 * 100, similarity_sp0 * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Similarity of '0's [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'zero_similarity.png'), show=True) print 'Showing dissimilarity - 0% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [dissimilarity_data * 100, dissimilarity_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="Dissimilarity [%]", xlim=False, ylim=False, out_path=os.path.join(base_path, 'dissimilarity.png'), show=True) print 'Showing average normalized overlap - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [overlap_data * 100, overlap_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Normalized Overlap", xlim=False, ylim=False, out_path=os.path.join(base_path, 'overlap.png'), show=True) print 'Showing % average sample correlation cofficient - 100% is ideal' plot_line([pct_noises * 100, pct_noises * 100], [correlation_data * 100, correlation_sp * 100], series_names=('Raw Data', 'SP Output'), x_label='% Noise', y_label="% Correlation", xlim=False, ylim=False, out_path=os.path.join(base_path, 'correlation.png'), show=True) print '*** All data saved in "{0}" ***'.format(base_path)
def base_experiment(pct_noise=0.15, noverlap_bits=0, exp_name='1-1', ntrials=10, verbose=True, seed=123456789): """ Run a single experiment, locally. @param pct_noise: The percentage of noise to add to the dataset. @param noverlap_bits: The number of bits the base class should overlap with the novelty class. @param exp_name: The name of the experiment. @param ntrials: The number of times to repeat the experiment. @param verbose: If True print the results. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4 clf_th = 0.5 log_dir = os.path.join(os.path.expanduser('~'), 'scratch', 'novelty_experiments', exp_name) # Configure the SP config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 75, 'seg_th': 15, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': log_dir } # Seed numpy np.random.seed(seed) # Create the base dataset x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed) x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:] # Create the outlier dataset base_indexes = set(np.where(x_ds.base_class == 1)[0]) choices = [x for x in xrange(nbits) if x not in base_indexes] outlier_base = np.zeros(nbits, dtype='bool') outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits, False)] = 1 outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1 y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed) y_te = y_ds.data if verbose: print "\nBase class' test noise: {0:2.2f}".format(1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.) print "Outlier's class noise: {0:2.2f}".format(1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.) print 'Overlap between two classes: {0}'.format(np.dot( x_ds.base_class.astype('i'), outlier_base.astype('i'))) # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te = metrics.compute_uniqueness(y_te) o_y_te = metrics.compute_overlap(y_te) c_y_te = 1 - metrics.compute_distance(y_te) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = np.zeros(ntrials) svm_x_results = np.zeros(ntrials) svm_y_results = np.zeros(ntrials) # Iterate across the trials: for i in xrange(ntrials): # Make a new seed seed2 = np.random.randint(1000000) config['seed'] = seed2 config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = sp.predict(y_te) # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te = metrics.compute_uniqueness(sp_y_te) o_sp_y_te = metrics.compute_overlap(sp_y_te) c_sp_y_te = 1 - metrics.compute_distance(sp_y_te) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te) sp._log_stats('Input Novelty Class Test Overlap', o_y_te) sp._log_stats('Input Novelty Class Test Correlation', c_y_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te) sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te) sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te) # Print the results fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}' if verbose: print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te' print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr, u_sp_x_te, u_sp_y_te) print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr, o_sp_x_te, o_sp_y_te) print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te, c_sp_x_tr, c_sp_x_te, c_sp_y_te) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp_base_to_y_te = 0. o_sp_base_to_y_te = 0. c_sp_base_to_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the sums u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) u_sp_base_to_y_te += metrics.compute_uniqueness(yt) o_sp_base_to_y_te += metrics.compute_overlap(yt) c_sp_base_to_y_te += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest u_sp_base_to_y_te /= ntest o_sp_base_to_y_te /= ntest c_sp_base_to_y_te /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) sp._log_stats('Base Train to Novelty Test Uniqueness', u_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te) sp._log_stats('Base Train to Novelty Test Correlation', c_sp_base_to_y_te) # Print the results if verbose: print '\nDescription\tx_tr->x_te\tx_tr->y_te' print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(u_sp_base_to_x_te, u_sp_base_to_y_te) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(o_sp_base_to_x_te, o_sp_base_to_y_te) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(c_sp_base_to_x_te, c_sp_base_to_y_te) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \ 100 # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = 0. for x, y in zip(sp_x_te, sp_y_te): # Refactor xt = np.vstack((sp_base_result, x)) yt = np.vstack((sp_base_result, y)) # Compute the accuracy xo = metrics.compute_overlap(xt) yo = metrics.compute_overlap(yt) if xo >= clf_th: clf_x_te += 1 if yo < clf_th: clf_y_te += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[i] = 100 - clf_x_te sp_y_results[i] = 100 - clf_y_te svm_x_results[i] = 100 - svm_x_te svm_y_results[i] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SP % Correct Novelty Class', clf_y_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) sp._log_stats('SVM % Correct Novelty Class', svm_y_te) # Print the results if verbose: print '\nSP Base Class Detection : {0:2.2f}%'.format(clf_x_te) print 'SP Novelty Class Detection : {0:2.2f}%'.format(clf_y_te) print 'SVM Base Class Detection : {0:2.2f}%'.format(svm_x_te) print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def base_experiment(config, ntrials=1, seed=123456789): """ Run a single experiment, locally. @param config: The configuration parameters to use for the SP. @param ntrials: The number of times to repeat the experiment. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 clf_th = 0.5 # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() tr_x_0 = np.random.permutation(tr_x[tr_y == 0]) x_tr = tr_x_0[:ntrain] x_te = tr_x_0[ntrain:ntrain + ntest] outliers = [np.random.permutation(tr_x[tr_y == i])[:ntest] for i in xrange(1, 10)] # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te, o_y_te, c_y_te = [], [], [] for outlier in outliers: u_y_te.append(metrics.compute_uniqueness(outlier)) o_y_te.append(metrics.compute_overlap(outlier)) c_y_te.append(1 - metrics.compute_distance(outlier)) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = [np.zeros(ntrials) for _ in xrange(9)] svm_x_results = np.zeros(ntrials) svm_y_results = [np.zeros(ntrials) for _ in xrange(9)] # Iterate across the trials: for nt in xrange(ntrials): # Make a new seeod seed2 = np.random.randint(1000000) config['seed'] = seed2 # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = [sp.predict(outlier) for outlier in outliers] # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te, o_sp_y_te, c_sp_y_te = [], [], [] for y in sp_y_te: u_sp_y_te.append(metrics.compute_uniqueness(y)) o_sp_y_te.append(metrics.compute_overlap(y)) c_sp_y_te.append(1 - metrics.compute_distance(y)) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) for i, (a, b, c, d, e, f) in enumerate(zip(u_y_te, o_y_te, c_y_te, u_sp_y_te, o_sp_y_te, c_sp_y_te), 1): sp._log_stats('Input Novelty Class {0} Uniqueness'.format(i), a) sp._log_stats('Input Novelty Class {0} Overlap'.format(i), b) sp._log_stats('Input Novelty Class {0} Correlation'.format(i), c) sp._log_stats('SP Novelty Class {0} Uniqueness'.format(i), d) sp._log_stats('SP Novelty Class {0} Overlap'.format(i), e) sp._log_stats('SP Novelty Class {0} Correlation'.format(i), f) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp, o_sp, c_sp = np.zeros(9), np.zeros(9), np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) u_sp[j] += metrics.compute_uniqueness(yt) o_sp[j] += metrics.compute_overlap(yt) c_sp[j] += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest for i in xrange(9): u_sp[i] /= ntest o_sp[i] /= ntest c_sp[i] /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('Base Train to Novelty {0} Uniqueness'.format(j), u_sp[i]) sp._log_stats('Base Train to Novelty {0} Overlap'.format(j), o_sp[i]) sp._log_stats('Base Train to Novelty {0} Correlation'.format(j), c_sp[i]) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = np.array([len(np.where(clf.predict(outlier) == -1)[0]) / float(ntest) * 100 for outlier in outliers]) # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) xo = metrics.compute_overlap(xt) if xo >= clf_th: clf_x_te += 1 for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) yo = metrics.compute_overlap(yt) if yo < clf_th: clf_y_te[j] += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[nt] = 100 - clf_x_te sp_y_results[nt] = 100 - clf_y_te svm_x_results[nt] = 100 - svm_x_te svm_y_results[nt] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('SP % Correct Novelty Class {0}'.format(j), clf_y_te[i]) sp._log_stats('SVM % Correct Novelty Class {0}'.format(j), svm_y_te[i]) sp._log_stats('SP % Mean Correct Novelty Class', np.mean(clf_y_te)) sp._log_stats('SVM % Mean Correct Novelty Class', np.mean(svm_y_te)) sp._log_stats('SP % Adjusted Score', (np.mean(clf_y_te) * clf_x_te) / 100) sp._log_stats('SVM % Adjusted Score', (np.mean(svm_y_te) * svm_x_te) / 100) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def run_experiment(experiments, base_dir, nsamples=500, nbits=100, pct_active=0.4, pct_noise=0.15, seed=123456789, ntrials=10, partition_name='debug', this_dir=os.getcwd()): """ Run an experiment for the SP. This experiment is used to vary various sets of parameters on the SP dataset. This function uses SLURM to conduct the experiments. @param experiments: A list containing the experiments details. Refer to one of the examples in this module for more details. @param base_dir: The base directory to use for logging. @param nsamples: The number of samples to add to the dataset. @param nbits: The number of bits each sample should have. @param pct_active: The percentage of bits that will be active in the base class SDR. @param pct_noise: The percentage of noise to add to the data. @param seed: The seed used to initialize the random number generator. @param ntrials: The number of parameter trials to use. Each iteration will be used to initialize the SP in a different manner. @param partition_name: The partition name of the cluster to use. @param this_dir: The full path to the directory where this file is located. """ # Create the dataset data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Prep each experiment for execution for experiment_name, time_limit, memory_limit, params in experiments: # Iterate through each type of inhibition type for i, global_inhibition in enumerate((True, False)): # Get base configuration base_config = create_base_config(base_dir, experiment_name, global_inhibition) # Add the parameters for param_name, param_value in params: base_config[param_name] = param_value config_gen = ConfigGenerator(base_config, ntrials) # Make the configurations for config in config_gen.get_config(): # Make the base directory dir = config['log_dir'] splits = os.path.basename(dir).split('-') base_name = '-'.join(s for s in splits[:-1]) dir = os.path.join(os.path.dirname(dir), base_name) try: os.makedirs(dir) except OSError: pass # Dump the config as JSON s = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': ')).replace('},', '},\n') with open(os.path.join(dir, 'config.json'), 'wb') as f: f.write(s) # Dump the dataset and the metrics with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f: cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) cPickle.dump((uniqueness_data, overlap_data, correlation_data), f, cPickle.HIGHEST_PROTOCOL) # Create the runner this_path = os.path.join(this_dir, 'parameter_exploration.py') command = 'python "{0}" "{1}" {2} {3}'.format(this_path, dir, ntrials, seed) runner_path = os.path.join(dir, 'runner.sh') job_name = '{0}_{1}{2}'.format(experiment_name, 'G' if global_inhibition else 'L', base_name) stdio_path = os.path.join(dir, 'stdio.txt') stderr_path = os.path.join(dir, 'stderr.txt') create_runner(command=command, runner_path=runner_path, job_name=job_name, partition_name=partition_name, stdio_path=stdio_path, stderr_path=stderr_path, time_limit=time_limit[i], memory_limit=memory_limit) # Execute the runner execute_runner(runner_path)
def local_experiment(): """ Run a single experiment, locally. """ seed = 123456789 config = { 'ninputs': 100, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 200, 'nactive': 50, 'nsynapses': 100, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments', '1-1') } # Get the data nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15 ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed) data = ds.data # Metrics metrics = SPMetrics() # Get the metrics for the dataset uniqueness_data = metrics.compute_uniqueness(data) overlap_data = metrics.compute_overlap(data) correlation_data = 1 - metrics.compute_distance(data) # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(data) # Get the SP's output sp_output = sp.predict(data) # Get the metrics for the SP's results sp_uniqueness = metrics.compute_uniqueness(sp_output) sp_overlap = metrics.compute_overlap(sp_output) sp_correlation = 1 - metrics.compute_distance(sp_output) # Log all of the metrics sp._log_stats('Input Uniqueness', uniqueness_data) sp._log_stats('Input Overlap', overlap_data) sp._log_stats('Input Correlation', correlation_data) sp._log_stats('SP Uniqueness', sp_uniqueness) sp._log_stats('SP Overlap', sp_overlap) sp._log_stats('SP Correlation', sp_correlation) print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(uniqueness_data, sp_uniqueness) print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(overlap_data, sp_overlap) print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(correlation_data, sp_correlation) # Get a new random input ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123) print '\n% Overlapping old class to new: \t{0:2.4f}%'.format( (float(np.dot(ds.input, ds2.input)) / nbits) * 100) # Test the SP on the new dataset sp_output2 = sp.predict(ds2.data) # Get average representation of first result original_result = np.mean(sp_output, 0) original_result[original_result >= 0.5] = 1 original_result[original_result < 1] = 0 # Get averaged results for each metric type sp_uniqueness2 = 0. sp_overlap2 = 0. sp_correlation2 = 0. for item in sp_output2: test = np.vstack((original_result, item)) sp_uniqueness2 = metrics.compute_uniqueness(test) sp_overlap2 = metrics.compute_overlap(test) sp_correlation2 = 1 - metrics.compute_distance(test) sp_uniqueness2 /= len(sp_output2) sp_overlap2 /= len(sp_output2) sp_correlation2 /= len(sp_output2) print sp_uniqueness2, sp_overlap2, sp_correlation2