def main2(): """ Use one class SVM for multi-class classification Accuracy = 71.45% """ # Initializations seed = 123456789 np.random.seed(seed) ntrain, ntest = 800, 200 (tr_x, tr_y), (te_x, te_y) = load_mnist() tr, te = [], [] for i in xrange(10): tr.append(np.random.permutation(tr_x[tr_y == i])[:ntrain]) te.append(np.random.permutation(te_x[te_y == i])[:ntest]) # Train the classifiers and get their results clfs = [] for i in xrange(10): clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed) clf.fit(tr[i]) clfs.append(clf) # Test the classifiers te_x = np.vstack(te) te_y = np.hstack([np.array([i] * ntest) for i in xrange(10)]) results = np.zeros((10, len(te_y))) for i in xrange(10): results[i] = clfs[i].decision_function(te_x).flatten() + \ np.random.uniform(0.1, 0.2, len(te_y)) print np.sum(np.argmax(results, 0) == te_y) / float(len(te_y))
def full_cv(base_dir): """Run the MNIST experiment. Iterate over each CV. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'r') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = pickle.load(f) # Execute each run for tr, te in cv: clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def main(): """ Use a linear SVM for multi-class classification. One vs the rest : 77.61% Default : 77.61% One vs one : 85.07% """ seed = 123456789 np.random.seed(seed) ntrain, ntest = 800, 200 (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) cv = MNISTCV(tr_y, te_y, ntrain, ntest, 1, seed) for tr, te in cv: clf = OneVsRestClassifier(LinearSVC(random_state=seed), -1) clf.fit(x[tr], y[tr]) print clf.score(x[te], y[te]) clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) print clf.score(x[te], y[te]) clf = OneVsOneClassifier(LinearSVC(random_state=seed), -1) clf.fit(x[tr], y[tr]) print clf.score(x[te], y[te])
def score_grid(): """ Classify with the gridded SP. """ p = 'results\\mnist_filter' (tr_x, tr_y), (te_x, te_y) = load_mnist() # Get the SPs sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0'] sp2 = load(os.path.join(p, 'sp1-0.pkl')) nwindows = 26 ** 2 nfeat = 100 * nwindows # w = [sp2.p[sp2.syn_map == j] for j in xrange(nfeat)] # ms = max(wi.shape[0] for wi in w) # with open(os.path.join(p, 'data.pkl'), 'wb') as f: # cPickle.dump((w, ms), f, cPickle.HIGHEST_PROTOCOL) with open(os.path.join(p, 'data.pkl'), 'rb') as f: w, ms = cPickle.load(f) # Get training data tr_x2 = np.zeros((tr_x.shape[0], nfeat)) for i, x in enumerate(tr_x): nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape( nwindows, 9) x = np.array(np.zeros(nfeat), dtype='bool') for j, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) x[j*100:(j*100)+100] = sp.y[:, 0] y = sp2.p * x[sp2.syn_map] w = np.zeros((nfeat, ms)) for j in xrange(nfeat): a = y[sp2.syn_map == j] w[j][:a.shape[0]] = a tr_x2[i] = np.mean(w, 1) # Get testing data te_x2 = np.zeros((te_x.shape[0], nfeat)) for i, x in enumerate(te_x): nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape( nwindows, 9) x = np.array(np.zeros(nfeat), dtype='bool') for j, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) x[j*100:(j*100)+100] = sp.y[:, 0] y = sp2.p * x[sp2.syn_map] w = np.zeros((nfeat, ms)) for j in xrange(nfeat): a = y[sp2.syn_map == j] w[j][:a.shape[0]] = a te_x2[i] = np.mean(w, 1) # Classify clf = LinearSVC(random_state=123456789) clf.fit(tr_x2, tr_y) print 'SVM Accuracy : {0:2.2f} %'.format(clf.score(te_x2, te_y) * 100)
def full_mnist(base_dir, new_dir, auto_update=False): """ Execute a full MNIST run using the parameters specified by ix. @param base_dir: The full path to the base directory. This directory should contain the config. @param new_dir: The full path of where the data should be saved. @param auto_update: If True the permanence increment and decrement amounts will automatically be computed by the runner. If False, the ones specified in the config file will be used. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['log_dir'] = new_dir kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() # Manually compute the permanence update amounts if auto_update: # Compute average sum of each training instance avg_s = tr_x.sum(1) # Compute the total average sum avg_ts = avg_s.mean() # Compute the average active probability a_p = avg_ts / float(tr_x.shape[1]) # Compute the scaling factor scaling_factor = 1 / avg_ts # Compute the update amounts pinc = scaling_factor * (1 / a_p) pdec = scaling_factor * (1 / (1 - a_p)) # Update the config kargs['pinc'], kargs['pdec'] = pinc, pdec # Execute clf = SPRegion(**kargs) clf.fit(tr_x, tr_y) # Column accuracy clf.score(te_x, te_y) # Probabilistic accuracy clf.score(te_x, te_y, tr_x=tr_x, score_method='prob') # Dimensionality reduction method clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction') ndims = len(clf.reduce_dimensions(tr_x[0])) clf._log_stats('Number of New Dimensions', ndims)
def score_grid(): """ Classify with the gridded SP. """ p = 'results\\mnist_filter' (tr_x, tr_y), (te_x, te_y) = load_mnist() # Get the SPs sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0'] sp2 = load(os.path.join(p, 'sp1-0.pkl')) nwindows = 26**2 nfeat = 100 * nwindows # w = [sp2.p[sp2.syn_map == j] for j in xrange(nfeat)] # ms = max(wi.shape[0] for wi in w) # with open(os.path.join(p, 'data.pkl'), 'wb') as f: # cPickle.dump((w, ms), f, cPickle.HIGHEST_PROTOCOL) with open(os.path.join(p, 'data.pkl'), 'rb') as f: w, ms = cPickle.load(f) # Get training data tr_x2 = np.zeros((tr_x.shape[0], nfeat)) for i, x in enumerate(tr_x): nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(nwindows, 9) x = np.array(np.zeros(nfeat), dtype='bool') for j, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) x[j * 100:(j * 100) + 100] = sp.y[:, 0] y = sp2.p * x[sp2.syn_map] w = np.zeros((nfeat, ms)) for j in xrange(nfeat): a = y[sp2.syn_map == j] w[j][:a.shape[0]] = a tr_x2[i] = np.mean(w, 1) # Get testing data te_x2 = np.zeros((te_x.shape[0], nfeat)) for i, x in enumerate(te_x): nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(nwindows, 9) x = np.array(np.zeros(nfeat), dtype='bool') for j, (xi, sp) in enumerate(izip(nx, sps)): sp.step(xi) x[j * 100:(j * 100) + 100] = sp.y[:, 0] y = sp2.p * x[sp2.syn_map] w = np.zeros((nfeat, ms)) for j in xrange(nfeat): a = y[sp2.syn_map == j] w[j][:a.shape[0]] = a te_x2[i] = np.mean(w, 1) # Classify clf = LinearSVC(random_state=123456789) clf.fit(tr_x2, tr_y) print 'SVM Accuracy : {0:2.2f} %'.format(clf.score(te_x2, te_y) * 100)
def full_mnist(base_dir, new_dir, auto_update=False): """ Execute a full MNIST run using the parameters specified by ix. @param base_dir: The full path to the base directory. This directory should contain the config. @param new_dir: The full path of where the data should be saved. @param auto_update: If True the permanence increment and decrement amounts will automatically be computed by the runner. If False, the ones specified in the config file will be used. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['log_dir'] = new_dir kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() # Manually compute the permanence update amounts if auto_update: # Compute average sum of each training instance avg_s = tr_x.sum(1) # Compute the total average sum avg_ts = avg_s.mean() # Compute the average active probability a_p = avg_ts / float(tr_x.shape[1]) # Compute the scaling factor scaling_factor = 1 / avg_ts # Compute the update amounts pinc = scaling_factor * (1 / a_p) pdec = scaling_factor * (1 / (1 - a_p)) # Update the config kargs['pinc'], kargs['pdec'] = pinc, pdec # Execute clf = SPRegion(**kargs) clf.fit(tr_x, tr_y) # Column accuracy clf.score(te_x, te_y) # Probabilistic accuracy clf.score(te_x, te_y, tr_x=tr_x, score_method='prob') # Dimensionality reduction method clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction') ndims = len(clf.reduce_dimensions(tr_x[0])) clf._log_stats('Number of New Dimensions', ndims)
def main3(log_dir): """ Use one class SP for multi-class classification Accuracy = 49.8% """ # Initializations seed = 123456789 np.random.seed(seed) ntrain, ntest = 800, 200 (tr_x, tr_y), (te_x, te_y) = load_mnist() tr, te = [], [] for i in xrange(10): tr.append(np.random.permutation(tr_x[tr_y == i])[:ntrain]) te.append(np.random.permutation(te_x[te_y == i])[:ntest]) params = { 'ninputs': 784, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'pct_active': None, 'random_permanence': True, 'pwindow': 0.5, 'global_inhibition': True, 'ncolumns': 784, 'nactive': 78, 'nsynapses': 100, 'seg_th': 0, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'nepochs': 10, 'log_dir': log_dir } metrics = SPMetrics() # Train the classifiers clfs = [] base_results = [] for clf, y in Parallel(n_jobs=-1)(delayed(_main3)(params, tr[i]) for i in xrange(10)): clfs.append(clf) base_results.append(y) # Test the classifiers te_x = np.vstack(te) te_y = np.hstack([np.array([i] * ntest) for i in xrange(10)]) results = np.array( Parallel(n_jobs=-1)( delayed(_main3_2)(clfs[i], te_x, base_results[i], seed) for i in xrange(10))) print np.sum(np.argmax(results, 0) == te_y) / float(len(te_y))
def one_cv(base_dir, cv_split): """ Run the MNIST experiment. Only the specified CV split is executed. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. @param cv_split: The index for the CV split. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)), 'rb') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = cPickle.load(f) tr, te = cv[cv_split - 1] # Remove the split directory, if it exists shutil.rmtree(os.path.join(base_dir, str(cv_split)), True) # Execute clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def one_cv(base_dir, cv_split): """ Run the MNIST experiment. Only the specified CV split is executed. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. @param cv_split: The index for the CV split. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)), 'rb') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = cPickle.load(f) tr, te = cv[cv_split - 1] # Remove the split directory, if it exists shutil.rmtree(os.path.join(base_dir, str(cv_split)), True) # Execute clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def full_cv(base_dir): """ Run the MNIST experiment. Each CV split is executed sequentially. @param base_dir: The full path to the base directory. This directory should contain the config as well as the pickled data. """ # Get the keyword arguments for the SP with open(os.path.join(base_dir, 'config.json'), 'rb') as f: kargs = json.load(f) kargs['clf'] = LinearSVC(random_state=kargs['seed']) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Get the CV splits with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f: cv = cPickle.load(f) # Execute each run for tr, te in cv: clf = SPRegion(**kargs) clf.fit(x[tr], y[tr]) # Column accuracy clf.score(x[te], y[te]) # Probabilistic accuracy clf.score(x[te], y[te], tr_x=x[tr], score_method='prob') # Dimensionality reduction method clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(clf.reduce_dimensions(x[0])) clf._log_stats('Number of New Dimensions', ndims)
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567): # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 10, 'global_inhibition': True, 'trim': False, 'seed': seed, 'disable_boost': True, 'nsynapses': 392, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.002, 'pwindow': 0.01, 'random_permanence': True, 'nepochs': 10, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in xrange(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the images shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789): # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 20, 'global_inhibition': True, 'trim': False, 'seed': seed, 'max_boost': 3, 'duty_cycle': 8, 'nsynapses': 392, 'seg_th': 2, 'syn_th': 0.5, 'pinc': 0.01, 'pdec': 0.02, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 1, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) score = clf.score(x[te], y[te]) print 'SVM Only Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the column method score = sp.score(x[te], y[te]) print 'Column Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the probabilistic method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob') print 'Probabilistic Accuracy: {0:.2f}%'.format(score * 100) # Test the region for the dimensionality reduction method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(sp.reduce_dimensions(x[0])) print 'Input Reduced from {0} to {1}: {2:.1f}X reduction'.format( ninputs, ndims, ninputs / float(ndims)) print 'Reduction Accuracy: {0:.2f}%'.format(score * 100) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in xrange(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the two x1_labels = [str(i) for i in xrange(10)] x2_labels = [str(i) for i in xrange(10)] title = 'Input Reconstruction: Original (top), SP (bottom)' shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_inputs), shape, title, (x1_labels, x2_labels,), path)
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789): """Run a simple MNIST classification task.""" # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 30, 'global_inhibition': True, 'trim': False, 'seed': seed, 'disable_boost': True, 'nsynapses': 392, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.002, 'pwindow': 0.01, 'random_permanence': True, 'nepochs': 10, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) score = clf.score(x[te], y[te]) print('SVM Only Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the column method score = sp.score(x[te], y[te]) print('Column Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the probabilistic method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob') print('Probabilistic Accuracy: {0:.2f}%'.format(score * 100)) # Test the region for the dimensionality reduction method score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction') ndims = len(sp.reduce_dimensions(x[0])) print('Input Reduced from {0} to {1}: {2:.1f}X reduction'.format( ninputs, ndims, ninputs / float(ndims))) print('Reduction Accuracy: {0:.2f}%'.format(score * 100)) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in range(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the images title = 'Input Reconstruction: Original (top), SP SDRs (middle), ' \ 'SP Reconstruction (bottom)' shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_pred, sp_inputs), shape, title, out_path=path)
def main(log_dir, ntrain=800, ntest=200, niter=10, nsplits=5, global_inhibition=True, seed=None): """ Build the information needed to perform CV on a subset of the MNIST dataset. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param seed: The seed for the random number generators. @return: The full set of X, the full set of Y, the keyword arguments for the classifier, the params for CV, and the CV. """ # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Create static parameters ninputs = tr_x.shape[1] kargs = { # Region parameters 'ninputs': ninputs, 'global_inhibition': global_inhibition, 'trim': 1e-4, 'seed': seed, # Synapse parameters 'syn_th': 0.5, 'random_permanence': True, # Fitting parameters 'nepochs': 30, 'clf': LinearSVC(random_state=seed) # NOTE: The SVM's will be identical, despite being seeded now } # Come up with some parameters to search param_distributions = { # Region parameters 'ncolumns': randint(100, 1001), 'nactive': uniform(0, 0.2), # As a percentage of the number of columns # Column parameters 'max_boost': randint(1, 21), 'duty_cycle': randint(10, 1001), # Segment parameters 'nsynapses': randint(1, ninputs + 1), 'seg_th': uniform(0, 0.1), # As a percentage of the number of synapses # Synapse parameters 'pinc': uniform(0.001, 0.1), 'pdec': uniform(0.001, 0.1), 'pwindow': uniform(0.001, 0.1), # Fitting parameters 'log_dir': log_dir } # Build the parameter generator gen = ParamGenerator(param_distributions, niter, nsplits, ninputs) params = {key: gen for key in param_distributions} return x, y, kargs, params, cv
def first_level(log_dir, ntrain=800, ntest=200, nsplits=1, seed=123456789): # Details of the filter win_size = 7 total_win_size = win_size * win_size nwindows = 16 # SP arguments kargs = { 'ninputs': total_win_size, 'ncolumns': 200, 'nactive': 50, 'global_inhibition': True, 'trim': 1e-4, 'disable_boost': True, 'seed': seed, 'nsynapses': 35, 'seg_th': 5, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.001, 'pwindow': 0.5, 'random_permanence': True, 'nepochs': 10, 'log_dir': os.path.join(log_dir, '1-1') } # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV tr, te = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed).gen.next() tr, te = tr[:ntrain], te[:ntest] # Store the labels to disk with open(os.path.join(log_dir, 'labels.pkl'), 'wb') as f: cPickle.dump((y[tr], y[te]), f, cPickle.HIGHEST_PROTOCOL) del tr_y del te_y del y # Build the training data train_data = np.zeros((nwindows, ntrain, total_win_size), dtype='bool') for i in xrange(ntrain): xi = x[tr[i]] for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)): train_data[j, i] = window # Build the testing data test_data = np.zeros((nwindows, ntest, total_win_size), dtype='bool') for i in xrange(ntest): xi = x[te[i]] for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)): test_data[j, i] = window del tr_x del te_x del x # Make the SPs sps = [SPRegion(**kargs) for _ in xrange(nwindows)] # Execute the SPs in parallel Parallel(n_jobs=-1)(delayed(execute)(sp, tr, te) for sp, tr, te in izip(sps, train_data, test_data))
def base_experiment(config, ntrials=1, seed=123456789): """ Run a single experiment, locally. @param config: The configuration parameters to use for the SP. @param ntrials: The number of times to repeat the experiment. @param seed: The random seed to use. @return: A tuple containing the percentage errors for the SP's training and testing results and the SVM's training and testing results, respectively. """ # Base parameters ntrain, ntest = 800, 200 clf_th = 0.5 # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() tr_x_0 = np.random.permutation(tr_x[tr_y == 0]) x_tr = tr_x_0[:ntrain] x_te = tr_x_0[ntrain:ntrain + ntest] outliers = [np.random.permutation(tr_x[tr_y == i])[:ntest] for i in xrange(1, 10)] # Metrics metrics = SPMetrics() # Get the metrics for the datasets u_x_tr = metrics.compute_uniqueness(x_tr) o_x_tr = metrics.compute_overlap(x_tr) c_x_tr = 1 - metrics.compute_distance(x_tr) u_x_te = metrics.compute_uniqueness(x_te) o_x_te = metrics.compute_overlap(x_te) c_x_te = 1 - metrics.compute_distance(x_te) u_y_te, o_y_te, c_y_te = [], [], [] for outlier in outliers: u_y_te.append(metrics.compute_uniqueness(outlier)) o_y_te.append(metrics.compute_overlap(outlier)) c_y_te.append(1 - metrics.compute_distance(outlier)) # Initialize the overall results sp_x_results = np.zeros(ntrials) sp_y_results = [np.zeros(ntrials) for _ in xrange(9)] svm_x_results = np.zeros(ntrials) svm_y_results = [np.zeros(ntrials) for _ in xrange(9)] # Iterate across the trials: for nt in xrange(ntrials): # Make a new seeod seed2 = np.random.randint(1000000) config['seed'] = seed2 # Create the SP sp = SPRegion(**config) # Fit the SP sp.fit(x_tr) # Get the SP's output sp_x_tr = sp.predict(x_tr) sp_x_te = sp.predict(x_te) sp_y_te = [sp.predict(outlier) for outlier in outliers] # Get the metrics for the SP's results u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr) o_sp_x_tr = metrics.compute_overlap(sp_x_tr) c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr) u_sp_x_te = metrics.compute_uniqueness(sp_x_te) o_sp_x_te = metrics.compute_overlap(sp_x_te) c_sp_x_te = 1 - metrics.compute_distance(sp_x_te) u_sp_y_te, o_sp_y_te, c_sp_y_te = [], [], [] for y in sp_y_te: u_sp_y_te.append(metrics.compute_uniqueness(y)) o_sp_y_te.append(metrics.compute_overlap(y)) c_sp_y_te.append(1 - metrics.compute_distance(y)) # Log all of the metrics sp._log_stats('Input Base Class Train Uniqueness', u_x_tr) sp._log_stats('Input Base Class Train Overlap', o_x_tr) sp._log_stats('Input Base Class Train Correlation', c_x_tr) sp._log_stats('Input Base Class Test Uniqueness', u_x_te) sp._log_stats('Input Base Class Test Overlap', o_x_te) sp._log_stats('Input Base Class Test Correlation', c_x_te) sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr) sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr) sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr) sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te) sp._log_stats('SP Base Class Test Overlap', o_sp_x_te) sp._log_stats('SP Base Class Test Correlation', c_sp_x_te) for i, (a, b, c, d, e, f) in enumerate(zip(u_y_te, o_y_te, c_y_te, u_sp_y_te, o_sp_y_te, c_sp_y_te), 1): sp._log_stats('Input Novelty Class {0} Uniqueness'.format(i), a) sp._log_stats('Input Novelty Class {0} Overlap'.format(i), b) sp._log_stats('Input Novelty Class {0} Correlation'.format(i), c) sp._log_stats('SP Novelty Class {0} Uniqueness'.format(i), d) sp._log_stats('SP Novelty Class {0} Overlap'.format(i), e) sp._log_stats('SP Novelty Class {0} Correlation'.format(i), f) # Get average representation of the base class sp_base_result = np.mean(sp_x_tr, 0) sp_base_result[sp_base_result >= 0.5] = 1 sp_base_result[sp_base_result < 1] = 0 # Averaged results for each metric type u_sp_base_to_x_te = 0. o_sp_base_to_x_te = 0. c_sp_base_to_x_te = 0. u_sp, o_sp, c_sp = np.zeros(9), np.zeros(9), np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) u_sp_base_to_x_te += metrics.compute_uniqueness(xt) o_sp_base_to_x_te += metrics.compute_overlap(xt) c_sp_base_to_x_te += 1 - metrics.compute_distance(xt) for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) u_sp[j] += metrics.compute_uniqueness(yt) o_sp[j] += metrics.compute_overlap(yt) c_sp[j] += 1 - metrics.compute_distance(yt) u_sp_base_to_x_te /= ntest o_sp_base_to_x_te /= ntest c_sp_base_to_x_te /= ntest for i in xrange(9): u_sp[i] /= ntest o_sp[i] /= ntest c_sp[i] /= ntest # Log the results sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te) sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('Base Train to Novelty {0} Uniqueness'.format(j), u_sp[i]) sp._log_stats('Base Train to Novelty {0} Overlap'.format(j), o_sp[i]) sp._log_stats('Base Train to Novelty {0} Correlation'.format(j), c_sp[i]) # Create an SVM clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2) # Evaluate the SVM's performance clf.fit(x_tr) svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \ 100 svm_y_te = np.array([len(np.where(clf.predict(outlier) == -1)[0]) / float(ntest) * 100 for outlier in outliers]) # Perform classification using overlap as the feature # -- The overlap must be above 50% clf_x_te = 0. clf_y_te = np.zeros(9) for i, x in enumerate(sp_x_te): xt = np.vstack((sp_base_result, x)) xo = metrics.compute_overlap(xt) if xo >= clf_th: clf_x_te += 1 for j, yi in enumerate(sp_y_te): yt = np.vstack((sp_base_result, yi[i])) yo = metrics.compute_overlap(yt) if yo < clf_th: clf_y_te[j] += 1 clf_x_te = (clf_x_te / ntest) * 100 clf_y_te = (clf_y_te / ntest) * 100 # Store the results as errors sp_x_results[nt] = 100 - clf_x_te sp_y_results[nt] = 100 - clf_y_te svm_x_results[nt] = 100 - svm_x_te svm_y_results[nt] = 100 - svm_y_te # Log the results sp._log_stats('SP % Correct Base Class', clf_x_te) sp._log_stats('SVM % Correct Base Class', svm_x_te) for i, j in enumerate(xrange(1, 10)): sp._log_stats('SP % Correct Novelty Class {0}'.format(j), clf_y_te[i]) sp._log_stats('SVM % Correct Novelty Class {0}'.format(j), svm_y_te[i]) sp._log_stats('SP % Mean Correct Novelty Class', np.mean(clf_y_te)) sp._log_stats('SVM % Mean Correct Novelty Class', np.mean(svm_y_te)) sp._log_stats('SP % Adjusted Score', (np.mean(clf_y_te) * clf_x_te) / 100) sp._log_stats('SVM % Adjusted Score', (np.mean(svm_y_te) * svm_x_te) / 100) return sp_x_results, sp_y_results, svm_x_results, svm_y_results
def main(log_dir, ntrain=800, ntest=200, niter=10, nsplits=5, global_inhibition=True, seed=None): """ Build the information needed to perform CV on a subset of the MNIST dataset. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param seed: The seed for the random number generators. @return: The full set of X, the full set of Y, the keyword arguments for the classifier, the params for CV, and the CV. """ # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Create static parameters ninputs = tr_x.shape[1] kargs = { # Region parameters 'ninputs': ninputs, 'global_inhibition': global_inhibition, 'trim': 1e-4, 'seed': seed, # Synapse parameters 'syn_th': 0.5, 'random_permanence': True, # Fitting parameters 'nepochs': 30, 'clf': LinearSVC(random_state=seed) # NOTE: The SVM's will be identical, despite being seeded now } # Come up with some parameters to search param_distributions = { # Region parameters 'ncolumns':randint(100, 1001), 'nactive':uniform(0, 0.2), # As a percentage of the number of columns # Column parameters 'max_boost': randint(1, 21), 'duty_cycle': randint(10, 1001), # Segment parameters 'nsynapses': randint(1, ninputs + 1), 'seg_th': uniform(0, 0.1), # As a percentage of the number of synapses # Synapse parameters 'pinc': uniform(0.001, 0.1), 'pdec': uniform(0.001, 0.1), 'pwindow': uniform(0.001, 0.1), # Fitting parameters 'log_dir': log_dir } # Build the parameter generator gen = ParamGenerator(param_distributions, niter, nsplits, ninputs) params = {key:gen for key in param_distributions} return x, y, kargs, params, cv
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567): # Set the configuration parameters for the SP ninputs = 784 kargs = { 'ninputs': ninputs, 'ncolumns': ninputs, 'nactive': 10, 'global_inhibition': True, 'trim': False, 'seed': seed, 'disable_boost': True, 'nsynapses': 392, 'seg_th': 10, 'syn_th': 0.5, 'pinc': 0.001, 'pdec': 0.002, 'pwindow': 0.01, 'random_permanence': True, 'nepochs': 10, 'clf': LinearSVC(random_state=seed), 'log_dir': os.path.join('simple_mnist', '1-1') } # Seed numpy np.random.seed(seed) # Get the data (tr_x, tr_y), (te_x, te_y) = load_mnist() x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y)) # Split the data for CV cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed) # Execute the SP on each fold. Additionally, get results for each fitting # method. for i, (tr, te) in enumerate(cv): # Create the region sp = SPRegion(**kargs) # Train the region sp.fit(x[tr], y[tr]) # Test the base classifier clf = LinearSVC(random_state=seed) clf.fit(x[tr], y[tr]) # Get a random set of unique inputs from the training set inputs = np.zeros((10, ninputs)) for i in xrange(10): ix = np.random.permutation(np.where(y[tr] == i)[0])[0] inputs[i] = x[tr][ix] # Get the SP's predictions for the inputs sp_pred = sp.predict(inputs) # Get the reconstruction in the context of the SP sp_inputs = sp.reconstruct_input(sp_pred) # Make a plot comparing the images shape = (28, 28) path = os.path.join(sp.log_dir, 'input_reconstruction.png') plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)