def __init__(self, inputDimensions, columnDimensions): SpatialPooler.__init__(self, inputDimensions, columnDimensions)
f = open ('data/characters', 'r') lines = f.readlines () f.close () inputs = [] ## read 5 chars for i in range (5): input_vector = "" for j in range (10): if j == 0: continue ##discard the first line input_vector = input_vector + lines [(i*10) + j][1:] input_vector = list (input_vector) ##filter some chars input_vector = [0 if item == ' ' or item == '\n' else item for item in input_vector] ##print input_vector inputs.append (input_vector) # print len (inputs[1]) # print len (inputs[1][1]) sp = SpatialPooler () for i in inputs: print i sp.feed (i) print len (sp.quantization_centers ())
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument( '-t', '--time', type=float, default=1, help='Number of times to run through the training data.') parser.add_argument('--debug', action='store_true') args = parser.parse_args(args=argv) # Load data. train_labels, train_images, test_labels, test_images = load_mnist() if False: # Experiment to verify that input dimensions are handled correctly If # you enable this, don't forget to rescale the radii as well as the # input. from scipy.ndimage import zoom new_sz = (1, 4, 1) train_images = [zoom(im, new_sz, order=0) for im in train_images] test_images = [zoom(im, new_sz, order=0) for im in test_images] training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) random.shuffle(test_data) if args.debug and args.time < 1: test_data = test_data[:int(len(test_data) * args.time)] # Setup spatial pooler machine. enc = BWImageEncoder(train_images[0].shape[:2]) sp = SpatialPooler(input_sdr=enc.output, segments=1, **parameters) sdrc = SDRClassifier(steps=[0]) if verbose: print(sp.statistics()) # Training Loop train_cycles = len(train_images) * args.time if verbose: print("Training for %d cycles" % train_cycles) for i in range(int(round(train_cycles))): sp.reset() img, lbl = random.choice(training_data) img = synthesize(img, diag=False) enc.encode(np.squeeze(img)) sp.compute() sdrc.compute(i, sp.columns.flat_index, classification={ "bucketIdx": lbl, "actValue": lbl }, learn=True, infer=False) if verbose: print("Done training.") print("") print("Removing zero permanence synapses.") sp.synapses.remove_zero_permanence_synapses() print(sp.statistics()) # Testing Loop if verbose: print("Testing for %d cycles." % len(test_data)) score = 0 for img, lbl in test_data: enc.encode(np.squeeze(img)) sp.compute(learn=False) try: inference = sdrc.infer(sp.columns.flat_index, None)[0] except IndexError: inference = np.zeros(10) if lbl == np.argmax(inference): score += 1 print('Score:', 100 * score / len(test_data), '%') if synapses_debug: sp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return score / len(test_data)
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('-t', '--time', type=int, default=5, help='Number of times to run through the training data.') parser.add_argument('--dataset', choices=('states', 'dictionary'), default='states') args = parser.parse_args(args = argv) # Load data. if args.dataset == 'states': dataset = state_names if verbose: print("Dataset is %d state names"%len(dataset)) elif args.dataset == 'dictionary': dataset = read_dictionary() dataset = random.sample(dataset, 500) if verbose: print("Dataset is dictionary words, sample size %d"%len(dataset)) dataset = sorted(dataset) word_ids = {word: idx for idx, word in enumerate(sorted(dataset))} confusion = np.zeros((len(dataset), len(dataset))) if verbose: print("Dataset: " + ", ".join('%d) %s'%idx_word for idx_word in enumerate(dataset))) # Construct TM. diagnostics_alpha = parameters['sp']['boosting_alpha'] enc = EnumEncoder(**parameters['enc']) enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha) sp = SpatialPooler( input_sdr = enc.output_sdr, **parameters['sp']) tm = TemporalMemory( column_sdr = sp.columns, anomaly_alpha = diagnostics_alpha, **parameters['tm']) sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc']) sdrc.compute(-1, [tm.active.size-1], # Initialize the table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) def reset(): enc.output_sdr.zero() sp.reset() tm.reset() # Train. if verbose: train_cycles = args.time * sum(len(w) for w in dataset) print("Training for %d cycles (%d dataset iterations)"%(train_cycles, args.time)) for i in range(args.time): random.shuffle(dataset) for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute() tm.compute() lbl = word_ids[word] sdrc.compute(tm.age, tm.learning.flat_index, classification={"bucketIdx": lbl, "actValue": lbl}, learn=True, infer=False) if verbose: print("Encoder", enc.output_sdr.statistics()) print(sp.statistics()) print(tm.statistics()) # Test. score = 0. score_samples = 0 for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute(learn = False) tm.compute(learn = False) inference = sdrc.infer(tm.active.flat_index, None) lbl = word_ids[word] if lbl == np.argmax(inference[0]): score += 1 score_samples += 1 confusion[lbl] += inference[0] print("Score:", 100. * score / score_samples, '%') if synapses_debug: tm.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") if verbose: import matplotlib.pyplot as plt plt.figure('Confusion Matrix') plt.imshow(confusion, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') plt.show() return score / score_samples
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('-t', '--time', type=int, default=20, help='Number of times to run through the training data.') parser.add_argument('--dataset', choices=('states', 'dictionary', 'gutenberg'), default='states') parser.add_argument('--words', type=int, default=500, help='Number of words to use.') parser.add_argument('--typo', type=float, default=0., help='Misspell words, percentage [0-1], default 0.') parser.add_argument('--practice', type=int, default=0, help='Makes the task easier by repeating words.') parser.add_argument('--learned_stability', action='store_true', help='Disable the stability mechanism during tests.') parser.add_argument('--disable_tm_sdrc', action='store_true',) args = parser.parse_args(args = argv) assert(parameters['tp_nz_value'] > 0) if verbose: print("Parameters = ", end='') import pprint pprint.pprint(parameters) print("") # Load dataset. The dataset consists of three variables: # 1) training_data is a list of words. # 2) testing_data is a list of words. # 3) dataset is dictionary of word -> identifier pairs. if args.dataset == 'states': # Remove spaces from between the two word states names. dataset = [word.replace(' ', '') for word in state_names] training_data = dataset * args.time testing_data = dataset * 5 random.shuffle(training_data) random.shuffle(testing_data) if verbose: print("Dataset is %d state names."%len(dataset)) elif args.dataset == 'dictionary': dataset = read_dictionary() dataset = random.sample(dataset, args.words) training_data = dataset * args.time testing_data = dataset * 5 random.shuffle(training_data) random.shuffle(testing_data) if verbose: print("Dataset is %d dictionary words."%len(dataset)) elif args.dataset == 'gutenberg': text = read_gutenberg(args.time) split = int(.80 * len(text)) # Fraction of data to train on. training_data = text[ : split] testing_data = text[split : ] # Put the most common words into the dataset to be trained & tested on. histogram = {} for word in training_data: if word not in histogram: histogram[word] = 0 histogram[word] += 1 histogram.pop('S', None) # Remove apostrophy 'S'. dataset = sorted(histogram, key = lambda word: histogram[word]) dataset = dataset[ -args.words : ] if verbose: print("Dataset is %d words from Project Gutenberg."%len(dataset)) unique_train = len(set(training_data)) unique_test = len(set(testing_data)) print("Unique words in training data %d, testing data %d"%(unique_train, unique_test)) dataset = {word: idx for idx, word in enumerate(sorted(set(dataset)))} if verbose: print("Training data %d words, %g%% dataset coverage."%( len(training_data), 100. * sum(1 for w in training_data if w in dataset) / len(dataset))) print("Testing data %d words, %g%% dataset coverage."%( len(testing_data), 100. * sum(1 for w in testing_data if w in dataset) / len(dataset))) print("Dataset: " + ", ".join('%d) %s'%(dataset[word], word) for word in sorted(dataset))) if args.practice: insertion_point = int(len(training_data) / 2) practice_dataset = list(dataset) random.shuffle(practice_dataset) for word in practice_dataset: for attempt in range(args.practice): training_data.insert(insertion_point, word) # Construct TM. diagnostics_alpha = parameters['sp']['boosting_alpha'] enc = EnumEncoder(**parameters['enc']) enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha) sp = SpatialPooler( input_sdr = enc.output_sdr, **parameters['sp']) tm = TemporalMemory( column_sdr = sp.columns, context_sdr = SDR((parameters['tp']['mini_columns'],)), anomaly_alpha = diagnostics_alpha, **parameters['tm']) if not args.disable_tm_sdrc: tm_sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc']) tm_sdrc.compute(-1, [tm.active.size-1], # Initialize the SDRCs internal table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) tp = StableSpatialPooler( input_sdr = tm.active, macro_columns = (1,), **parameters['tp']) tp_sdrc = SDRClassifier(steps=[0], **parameters['tp_sdrc']) tp_sdrc.compute(-1, [tp.columns.size-1], # Initialize the SDRCs internal table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) def reset(): enc.output_sdr.zero() sp.reset() tm.reset() tp.reset() def compute(char, learn): enc.encode(char) sp.compute(learn=learn) tm.context_sdr.flat_index = tp.columns.flat_index tm.context_sdr.nz_values.fill(parameters['tp_nz_value']) tm.compute(learn=learn) tp.compute(learn=learn, input_learning_sdr = tm.learning,) # TRAIN if verbose: train_cycles = sum(len(w) for w in training_data) iterations = len(training_data) / len(dataset) print("Training for %d cycles (%d dataset iterations)"%(train_cycles, iterations)) reset() for word in training_data: for idx, char in enumerate(word): compute(char, learn=True) # Process each word before training on the final character. try: label = dataset[word] except KeyError: continue if len(tm.learning) and not args.disable_tm_sdrc: tm_sdrc.compute(tm.age, tm.learning.flat_index, classification={"bucketIdx": label, "actValue": label}, learn=True, infer=False) if len(tp.columns): tp_sdrc.compute(tp.age, tp.columns.flat_index, classification={"bucketIdx": label, "actValue": label}, learn=True, infer=False) if verbose: print("Done training. System statistics:") print("") print("Encoder", enc.output_sdr.statistics()) print(sp.statistics()) print(tm.statistics()) print(tp.statistics()) print("") # TEST # Make some new words which the system has never seen before. if verbose: random_words = [] for word in dataset: alphabet = [chr(ord('A') + i) for i in range(26)] random_word = ''.join(random.choice(alphabet) for c in word) random_words.append(random_word) print("Novel Words Dataset: " + ', '.join(random_words)) print("") # Measure response to new random words. rand_word_tp_ovlp = 0. n_samples = 0 for word in random_words: reset() response = [] for char in word: compute(char, learn = False) response.append(SDR(tp.columns)) for sdr_a, sdr_b in itertools.combinations(response, 2): rand_word_tp_ovlp += sdr_a.overlap(sdr_b) n_samples += 1 rand_word_tp_ovlp /= n_samples print("Novel Words (Isolated), Average Overlap Within Word %g %%"%(100 * rand_word_tp_ovlp)) # Measure response to new random words, with the stability mechanism # turned off. stability_rate = tp.stability_rate tp.stability_rate = 1. rand_word_tp_ovlp_no_stab = 0. for word in random_words: reset() response = [] for char in word: compute(char, learn = False) response.append(SDR(tp.columns)) for sdr_a, sdr_b in itertools.combinations(response, 2): rand_word_tp_ovlp_no_stab += sdr_a.overlap(sdr_b) rand_word_tp_ovlp_no_stab /= n_samples tp.stability_rate = stability_rate print("Novel Words (Isolated), No Stability Mechanism, Avg Ovlp Within Word %g %%"%(100 * rand_word_tp_ovlp_no_stab)) # Compare new word response to that of randomly generated SDRs. rand_sdr_ovlp = 0. tp_n_active = len(tp.columns) for i in range(n_samples): sdr_a = SDR(tp.columns) sdr_b = SDR(tp.columns) sdr_a.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active)) sdr_b.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active)) rand_sdr_ovlp += sdr_a.overlap(sdr_b) rand_sdr_ovlp /= n_samples print("Random Comparable SDR(n=%d sparsity=%g%%), Average Overlap %g %%"%( tp.columns.size, 100 * tp_n_active / tp.columns.size, 100 * rand_sdr_ovlp),) print("") if args.learned_stability: tp.stability_rate = 1 if verbose: print("") print("Disabled Stability Mechanism...") print("") # Measure response to each word in isolation. if verbose: catagories = {word : [] for word in dataset} tm_accuacy = 0. tp_accuacy = 0. n_samples = 0 for word, word_id in dataset.items(): reset() for char in word: compute(char, learn = False) catagories[word].append(SDR(tp.columns)) if not args.disable_tm_sdrc: try: tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0] except IndexError: tm_inference = np.random.random(size=len(dataset)) tm_accuacy += word_id == np.argmax(tm_inference) try: tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0] except IndexError: tp_inference = np.random.random(size=len(dataset)) tp_accuacy += word_id == np.argmax(tp_inference) n_samples += 1 tm_accuacy /= n_samples tp_accuacy /= n_samples print("") print("Isolated Word Stability / Distinctiveness:") stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose) print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples)) print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples)) print("") # Measure response to words in context. Measure the overlap between the # same words in different contexts. Also check the classifier accuracy. catagories = {word : [] for word in dataset} tm_accuacy = 0. tp_accuacy = 0. tm_confusion = np.zeros((len(dataset), len(dataset))) tp_confusion = np.zeros((len(dataset), len(dataset))) n_samples = 0 reset() for word in testing_data: if random.random() < args.typo: mutated_word = mutate_word(word) else: mutated_word = word for char in mutated_word: compute(char, learn = False) if word in catagories: catagories[word].append(SDR(tp.columns)) # Check Classifier Accuracy. try: word_id = dataset[word] except KeyError: continue if not args.disable_tm_sdrc: try: tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0] except IndexError: tm_inference = np.random.random(size=len(dataset)) tm_accuacy += word_id == np.argmax(tm_inference) tm_confusion[word_id] += tm_inference / np.sum(tm_inference) try: tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0] except IndexError: tp_inference = np.random.random(size=len(dataset)) tp_accuacy += word_id == np.argmax(tp_inference) tp_confusion[word_id] += tp_inference / np.sum(tp_inference) n_samples += 1 tm_accuacy /= n_samples tp_accuacy /= n_samples if verbose: print("") print("In-Context Word Stability / Distinctiveness:") stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose) if verbose: print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples)) print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples)) score = (stability * tm_accuacy * tp_accuacy) if verbose: print("Score: %g"%score) # Display Confusion Matixes if verbose: conf_matrices = (tm_confusion, tp_confusion,) conf_titles = ('Temporal Memory', 'Temporal Pooler',) # import matplotlib.pyplot as plt plt.figure("Word Recognition Confusion") for subplot_idx, matrix_title in enumerate(zip(conf_matrices, conf_titles)): matrix, title = matrix_title plt.subplot(1, len(conf_matrices), subplot_idx + 1) plt.title(title + " Confusion") matrix /= np.sum(matrix, axis=0) plt.imshow(matrix, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') for label, idx in dataset.items(): plt.text(idx, len(dataset) + .5, label, rotation='vertical', horizontalalignment='center', verticalalignment='bottom') plt.text(-1.5, idx, label, horizontalalignment='left', verticalalignment='center') # Show a sample of input. if verbose: sentance = [] boundries = [] anomaly_hist = [] stability_hist = [] tp_active_hist = [] tp_class_hist = [] tp_prev_active = SDR(tp.columns.dimensions) n_samples = 0 sample_data = testing_data[ : 100] reset() for word in sample_data: if random.random() < args.typo: mutated_word = mutate_word(word) else: mutated_word = word for index, char in enumerate(mutated_word): compute(char, learn = False) try: tp_inference = np.argmax(tp_sdrc.infer(tp.columns.flat_index, None)[0]) except IndexError: tp_inference = random.choice(range(len(dataset))) tp_class_hist.append(tp_inference) if index == 0: boundries.append(n_samples) sentance.append(char) anomaly_hist.append(tm.anomaly) tp_active_hist.append(SDR(tp.columns)) stability_hist.append(tp.columns.overlap(tp_prev_active)) tp_prev_active = SDR(tp.columns) n_samples += 1 plt.figure("ASCII Stability") stability_weighted = overlap_stability_weighted(tp_active_hist) plt.plot( # np.arange(n_samples)+.5, anomaly_hist, 'ro', # np.arange(n_samples)+.5, stability_hist, 'b-', np.arange(n_samples)+.5, stability_weighted, 'b-',) for idx, char in enumerate(sentance): plt.text(idx + .5, .01, char, horizontalalignment='center') for x in boundries: plt.axvline(x, color='k') sorted_dataset = sorted(dataset) for idx, word_id in enumerate(tp_class_hist): word = sorted_dataset[word_id] plt.text(idx + .5, 1., word, rotation = 90, horizontalalignment = 'center', verticalalignment = 'top',) figure_title = "Output Layer Stability" if args.learned_stability: figure_title += " - Stability Mechanism Disabled." figure_title += "\nInput character at bottom, Classification at top, Vertical lines are word boundries." plt.title(figure_title) plt.ylabel('Stability') plt.xlabel('Time step') plt.show() if synapses_debug: sp.synapses.check_data_integrity() tm.synapses.check_data_integrity() tp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return score