def __init__(self, inputDimensions, columnDimensions):
	    SpatialPooler.__init__(self, inputDimensions, columnDimensions)
Ejemplo n.º 2
0
f = open ('data/characters', 'r')
lines = f.readlines ()
f.close ()

inputs = []

## read 5 chars
for i in range (5):
    input_vector = ""
    for j in range (10):
        if j == 0: continue ##discard the first line
        input_vector = input_vector + lines [(i*10) + j][1:]  
        
    input_vector = list (input_vector)
    
    ##filter some chars
    input_vector = [0 if item == ' ' or item == '\n' 
                    else item for item in input_vector]
    ##print input_vector
    inputs.append (input_vector)

# print len (inputs[1])
# print len (inputs[1][1])

sp = SpatialPooler ()
for i in inputs:
    print i
    sp.feed (i)
    print len (sp.quantization_centers ())
Ejemplo n.º 3
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-t',
        '--time',
        type=float,
        default=1,
        help='Number of times to run through the training data.')
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args(args=argv)

    # Load data.
    train_labels, train_images, test_labels, test_images = load_mnist()

    if False:
        # Experiment to verify that input dimensions are handled correctly If
        # you enable this, don't forget to rescale the radii as well as the
        # input.
        from scipy.ndimage import zoom
        new_sz = (1, 4, 1)
        train_images = [zoom(im, new_sz, order=0) for im in train_images]
        test_images = [zoom(im, new_sz, order=0) for im in test_images]

    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)
    random.shuffle(test_data)
    if args.debug and args.time < 1:
        test_data = test_data[:int(len(test_data) * args.time)]
    # Setup spatial pooler machine.
    enc = BWImageEncoder(train_images[0].shape[:2])
    sp = SpatialPooler(input_sdr=enc.output, segments=1, **parameters)
    sdrc = SDRClassifier(steps=[0])

    if verbose:
        print(sp.statistics())

    # Training Loop
    train_cycles = len(train_images) * args.time
    if verbose:
        print("Training for %d cycles" % train_cycles)
    for i in range(int(round(train_cycles))):
        sp.reset()
        img, lbl = random.choice(training_data)
        img = synthesize(img, diag=False)
        enc.encode(np.squeeze(img))
        sp.compute()
        sdrc.compute(i,
                     sp.columns.flat_index,
                     classification={
                         "bucketIdx": lbl,
                         "actValue": lbl
                     },
                     learn=True,
                     infer=False)

    if verbose:
        print("Done training.")
        print("")
        print("Removing zero permanence synapses.")
        sp.synapses.remove_zero_permanence_synapses()
        print(sp.statistics())

    # Testing Loop
    if verbose:
        print("Testing for %d cycles." % len(test_data))
    score = 0
    for img, lbl in test_data:
        enc.encode(np.squeeze(img))
        sp.compute(learn=False)
        try:
            inference = sdrc.infer(sp.columns.flat_index, None)[0]
        except IndexError:
            inference = np.zeros(10)
        if lbl == np.argmax(inference):
            score += 1

    print('Score:', 100 * score / len(test_data), '%')

    if synapses_debug:
        sp.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    return score / len(test_data)
Ejemplo n.º 4
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--time', type=int, default=5,
                        help='Number of times to run through the training data.')
    parser.add_argument('--dataset', choices=('states', 'dictionary'), default='states')
    args = parser.parse_args(args = argv)

    # Load data.
    if args.dataset == 'states':
        dataset = state_names
        if verbose:
            print("Dataset is %d state names"%len(dataset))
    elif args.dataset == 'dictionary':
        dataset = read_dictionary()
        dataset = random.sample(dataset, 500)
        if verbose:
            print("Dataset is dictionary words, sample size %d"%len(dataset))

    dataset   = sorted(dataset)
    word_ids  = {word: idx for idx, word in enumerate(sorted(dataset))}
    confusion = np.zeros((len(dataset), len(dataset)))
    if verbose:
        print("Dataset: " + ", ".join('%d) %s'%idx_word for idx_word in enumerate(dataset)))

    # Construct TM.
    diagnostics_alpha = parameters['sp']['boosting_alpha']
    enc = EnumEncoder(**parameters['enc'])
    enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha)
    sp = SpatialPooler(
        input_sdr         = enc.output_sdr,
        **parameters['sp'])
    tm = TemporalMemory(
        column_sdr        = sp.columns,
        anomaly_alpha     = diagnostics_alpha,
        **parameters['tm'])
    sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc'])
    sdrc.compute(-1, [tm.active.size-1],    # Initialize the table.
        classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
        learn=True, infer=False)

    def reset():
        enc.output_sdr.zero()
        sp.reset()
        tm.reset()

    # Train.
    if verbose:
        train_cycles = args.time * sum(len(w) for w in dataset)
        print("Training for %d cycles (%d dataset iterations)"%(train_cycles, args.time))
    for i in range(args.time):
        random.shuffle(dataset)
        for word in dataset:
            reset()
            for idx, char in enumerate(word):
                enc.encode(char)
                sp.compute()
                tm.compute()
            lbl = word_ids[word]
            sdrc.compute(tm.age, tm.learning.flat_index,
                classification={"bucketIdx": lbl, "actValue": lbl},
                learn=True, infer=False)

    if verbose:
        print("Encoder", enc.output_sdr.statistics())
        print(sp.statistics())
        print(tm.statistics())

    # Test.
    score = 0.
    score_samples = 0
    for word in dataset:
        reset()
        for idx, char in enumerate(word):
            enc.encode(char)
            sp.compute(learn = False)
            tm.compute(learn = False)

        inference = sdrc.infer(tm.active.flat_index, None)
        lbl = word_ids[word]
        if lbl == np.argmax(inference[0]):
            score += 1
        score_samples += 1
        confusion[lbl] += inference[0]
    print("Score:", 100. * score / score_samples, '%')

    if synapses_debug:
        tm.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    if verbose:
        import matplotlib.pyplot as plt
        plt.figure('Confusion Matrix')
        plt.imshow(confusion, interpolation='nearest')
        plt.xlabel('Prediction')
        plt.ylabel('Label')
        plt.show()

    return score / score_samples
Ejemplo n.º 5
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--time', type=int, default=20,
                        help='Number of times to run through the training data.')
    parser.add_argument('--dataset', choices=('states', 'dictionary', 'gutenberg'),
        default='states')
    parser.add_argument('--words', type=int, default=500,
        help='Number of words to use.')
    parser.add_argument('--typo', type=float, default=0.,
        help='Misspell words, percentage [0-1], default 0.')
    parser.add_argument('--practice', type=int, default=0,
        help='Makes the task easier by repeating words.')
    parser.add_argument('--learned_stability', action='store_true',
        help='Disable the stability mechanism during tests.')
    parser.add_argument('--disable_tm_sdrc', action='store_true',)
    args = parser.parse_args(args = argv)

    assert(parameters['tp_nz_value'] > 0)

    if verbose:
        print("Parameters = ", end='')
        import pprint
        pprint.pprint(parameters)
        print("")

    # Load dataset.  The dataset consists of three variables:
    # 1) training_data is a list of words.
    # 2) testing_data is a list of words.
    # 3) dataset is dictionary of word -> identifier pairs.
    if args.dataset == 'states':
        # Remove spaces from between the two word states names.
        dataset       = [word.replace(' ', '') for word in state_names]
        training_data = dataset * args.time
        testing_data  = dataset * 5
        random.shuffle(training_data)
        random.shuffle(testing_data)
        if verbose:
            print("Dataset is %d state names."%len(dataset))
    elif args.dataset == 'dictionary':
        dataset       = read_dictionary()
        dataset       = random.sample(dataset, args.words)
        training_data = dataset * args.time
        testing_data  = dataset * 5
        random.shuffle(training_data)
        random.shuffle(testing_data)
        if verbose:
            print("Dataset is %d dictionary words."%len(dataset))
    elif args.dataset == 'gutenberg':
        text          = read_gutenberg(args.time)
        split         = int(.80 * len(text))    # Fraction of data to train on.
        training_data = text[ : split]
        testing_data  = text[split : ]
        # Put the most common words into the dataset to be trained & tested on.
        histogram     = {}
        for word in training_data:
            if word not in histogram:
                histogram[word] = 0
            histogram[word] += 1
        histogram.pop('S', None)    # Remove apostrophy 'S'.
        dataset = sorted(histogram, key = lambda word: histogram[word])
        dataset = dataset[ -args.words : ]
        if verbose:
            print("Dataset is %d words from Project Gutenberg."%len(dataset))
            unique_train = len(set(training_data))
            unique_test  = len(set(testing_data))
            print("Unique words in training data %d, testing data %d"%(unique_train, unique_test))

    dataset = {word: idx for idx, word in enumerate(sorted(set(dataset)))}
    if verbose:
        print("Training data %d words, %g%% dataset coverage."%(
            len(training_data),
            100. * sum(1 for w in training_data if w in dataset) / len(dataset)))
        print("Testing data %d words, %g%% dataset coverage."%(
            len(testing_data),
            100. * sum(1 for w in testing_data if w in dataset) / len(dataset)))
        print("Dataset: " + ", ".join('%d) %s'%(dataset[word], word) for word in sorted(dataset)))

    if args.practice:
        insertion_point  = int(len(training_data) / 2)
        practice_dataset = list(dataset)
        random.shuffle(practice_dataset)
        for word in practice_dataset:
            for attempt in range(args.practice):
                training_data.insert(insertion_point, word)

    # Construct TM.
    diagnostics_alpha = parameters['sp']['boosting_alpha']
    enc = EnumEncoder(**parameters['enc'])
    enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha)
    sp = SpatialPooler(
        input_sdr         = enc.output_sdr,
        **parameters['sp'])
    tm = TemporalMemory(
        column_sdr        = sp.columns,
        context_sdr       = SDR((parameters['tp']['mini_columns'],)),
        anomaly_alpha     = diagnostics_alpha,
        **parameters['tm'])
    if not args.disable_tm_sdrc:
        tm_sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc'])
        tm_sdrc.compute(-1, [tm.active.size-1],    # Initialize the SDRCs internal table.
            classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
            learn=True, infer=False)
    tp = StableSpatialPooler(
        input_sdr         = tm.active,
        macro_columns     = (1,),
        **parameters['tp'])
    tp_sdrc = SDRClassifier(steps=[0], **parameters['tp_sdrc'])
    tp_sdrc.compute(-1, [tp.columns.size-1],    # Initialize the SDRCs internal table.
        classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
        learn=True, infer=False)

    def reset():
        enc.output_sdr.zero()
        sp.reset()
        tm.reset()
        tp.reset()

    def compute(char, learn):
        enc.encode(char)
        sp.compute(learn=learn)
        tm.context_sdr.flat_index = tp.columns.flat_index
        tm.context_sdr.nz_values.fill(parameters['tp_nz_value'])
        tm.compute(learn=learn)
        tp.compute(learn=learn,
            input_learning_sdr = tm.learning,)

    # TRAIN
    if verbose:
        train_cycles = sum(len(w) for w in training_data)
        iterations   = len(training_data) / len(dataset)
        print("Training for %d cycles (%d dataset iterations)"%(train_cycles, iterations))

    reset()
    for word in training_data:
        for idx, char in enumerate(word):
            compute(char, learn=True)
        # Process each word before training on the final character.
        try:
            label = dataset[word]
        except KeyError:
            continue
        if len(tm.learning) and not args.disable_tm_sdrc:
            tm_sdrc.compute(tm.age, tm.learning.flat_index,
                classification={"bucketIdx": label, "actValue": label},
                learn=True, infer=False)
        if len(tp.columns):
            tp_sdrc.compute(tp.age, tp.columns.flat_index,
                classification={"bucketIdx": label, "actValue": label},
                learn=True, infer=False)

    if verbose:
        print("Done training.  System statistics:")
        print("")
        print("Encoder", enc.output_sdr.statistics())
        print(sp.statistics())
        print(tm.statistics())
        print(tp.statistics())
        print("")

    # TEST
    # Make some new words which the system has never seen before.
    if verbose:
        random_words = []
        for word in dataset:
            alphabet    = [chr(ord('A') + i) for i in range(26)]
            random_word = ''.join(random.choice(alphabet) for c in word)
            random_words.append(random_word)
        print("Novel Words Dataset: " + ', '.join(random_words))
        print("")

        # Measure response to new random words.
        rand_word_tp_ovlp = 0.
        n_samples         = 0
        for word in random_words:
            reset()
            response = []
            for char in word:
                compute(char, learn = False)
                response.append(SDR(tp.columns))
            for sdr_a, sdr_b in itertools.combinations(response, 2):
                rand_word_tp_ovlp += sdr_a.overlap(sdr_b)
                n_samples += 1
        rand_word_tp_ovlp /= n_samples
        print("Novel Words (Isolated), Average Overlap Within Word %g %%"%(100 * rand_word_tp_ovlp))

        # Measure response to new random words, with the stability mechanism
        # turned off.
        stability_rate = tp.stability_rate
        tp.stability_rate = 1.
        rand_word_tp_ovlp_no_stab = 0.
        for word in random_words:
            reset()
            response = []
            for char in word:
                compute(char, learn = False)
                response.append(SDR(tp.columns))
            for sdr_a, sdr_b in itertools.combinations(response, 2):
                rand_word_tp_ovlp_no_stab += sdr_a.overlap(sdr_b)
        rand_word_tp_ovlp_no_stab /= n_samples
        tp.stability_rate = stability_rate
        print("Novel Words (Isolated), No Stability Mechanism, Avg Ovlp Within Word %g %%"%(100 * rand_word_tp_ovlp_no_stab))

        # Compare new word response to that of randomly generated SDRs.
        rand_sdr_ovlp = 0.
        tp_n_active   = len(tp.columns)
        for i in range(n_samples):
            sdr_a = SDR(tp.columns)
            sdr_b = SDR(tp.columns)
            sdr_a.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active))
            sdr_b.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active))
            rand_sdr_ovlp += sdr_a.overlap(sdr_b)
        rand_sdr_ovlp /= n_samples
        print("Random Comparable SDR(n=%d sparsity=%g%%), Average Overlap %g %%"%(
            tp.columns.size,
            100 * tp_n_active / tp.columns.size,
            100 * rand_sdr_ovlp),)
        print("")

    if args.learned_stability:
        tp.stability_rate = 1
        if verbose:
            print("")
            print("Disabled Stability Mechanism...")
            print("")

    # Measure response to each word in isolation.
    if verbose:
        catagories   = {word : [] for word in dataset}
        tm_accuacy   = 0.
        tp_accuacy   = 0.
        n_samples    = 0
        for word, word_id in dataset.items():
            reset()
            for char in word:
                compute(char, learn = False)
                catagories[word].append(SDR(tp.columns))
            if not args.disable_tm_sdrc:
                try:
                    tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0]
                except IndexError:
                    tm_inference = np.random.random(size=len(dataset))
                tm_accuacy += word_id == np.argmax(tm_inference)
            try:
                tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0]
            except IndexError:
                tp_inference = np.random.random(size=len(dataset))
            tp_accuacy += word_id == np.argmax(tp_inference)
            n_samples  += 1
        tm_accuacy /= n_samples
        tp_accuacy /= n_samples
        print("")
        print("Isolated Word Stability / Distinctiveness:")
        stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose)
        print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples))
        print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples))
        print("")

    # Measure response to words in context.  Measure the overlap between the
    # same words in different contexts.  Also check the classifier accuracy.
    catagories   = {word : [] for word in dataset}
    tm_accuacy   = 0.
    tp_accuacy   = 0.
    tm_confusion = np.zeros((len(dataset), len(dataset)))
    tp_confusion = np.zeros((len(dataset), len(dataset)))
    n_samples    = 0
    reset()
    for word in testing_data:
        if random.random() < args.typo:
            mutated_word = mutate_word(word)
        else:
            mutated_word = word

        for char in mutated_word:
            compute(char, learn = False)
            if word in catagories:
                catagories[word].append(SDR(tp.columns))

        # Check Classifier Accuracy.
        try:
            word_id = dataset[word]
        except KeyError:
            continue
        if not args.disable_tm_sdrc:
            try:
                tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0]
            except IndexError:
                tm_inference = np.random.random(size=len(dataset))
            tm_accuacy += word_id == np.argmax(tm_inference)
            tm_confusion[word_id] += tm_inference / np.sum(tm_inference)
        try:
            tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0]
        except IndexError:
            tp_inference = np.random.random(size=len(dataset))
        tp_accuacy += word_id == np.argmax(tp_inference)
        tp_confusion[word_id] += tp_inference / np.sum(tp_inference)
        n_samples  += 1
    tm_accuacy /= n_samples
    tp_accuacy /= n_samples
    if verbose:
        print("")
        print("In-Context Word Stability / Distinctiveness:")
    stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose)
    if verbose:
        print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples))
        print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples))

    score = (stability * tm_accuacy * tp_accuacy)
    if verbose:
        print("Score: %g"%score)

    # Display Confusion Matixes
    if verbose:
        conf_matrices = (tm_confusion, tp_confusion,)
        conf_titles   = ('Temporal Memory', 'Temporal Pooler',)
        #
        import matplotlib.pyplot as plt
        plt.figure("Word Recognition Confusion")
        for subplot_idx, matrix_title in enumerate(zip(conf_matrices, conf_titles)):
            matrix, title = matrix_title
            plt.subplot(1, len(conf_matrices), subplot_idx + 1)
            plt.title(title + " Confusion")
            matrix /= np.sum(matrix, axis=0)
            plt.imshow(matrix, interpolation='nearest')
            plt.xlabel('Prediction')
            plt.ylabel('Label')
            for label, idx in dataset.items():
                plt.text(idx, len(dataset) + .5, label, rotation='vertical',
                    horizontalalignment='center', verticalalignment='bottom')
                plt.text(-1.5, idx, label,
                    horizontalalignment='left', verticalalignment='center')

    # Show a sample of input.
    if verbose:
        sentance        = []
        boundries       = []
        anomaly_hist    = []
        stability_hist  = []
        tp_active_hist  = []
        tp_class_hist   = []
        tp_prev_active  = SDR(tp.columns.dimensions)
        n_samples       = 0
        sample_data     = testing_data[ : 100]
        reset()
        for word in sample_data:
            if random.random() < args.typo:
                mutated_word = mutate_word(word)
            else:
                mutated_word = word

            for index, char in enumerate(mutated_word):
                compute(char, learn = False)
                try:
                    tp_inference = np.argmax(tp_sdrc.infer(tp.columns.flat_index, None)[0])
                except IndexError:
                    tp_inference = random.choice(range(len(dataset)))
                tp_class_hist.append(tp_inference)
                if index == 0:
                    boundries.append(n_samples)
                sentance.append(char)
                anomaly_hist.append(tm.anomaly)
                tp_active_hist.append(SDR(tp.columns))
                stability_hist.append(tp.columns.overlap(tp_prev_active))
                tp_prev_active = SDR(tp.columns)
                n_samples += 1

        plt.figure("ASCII Stability")
        stability_weighted = overlap_stability_weighted(tp_active_hist)
        plt.plot(
                 # np.arange(n_samples)+.5, anomaly_hist,   'ro',
                 # np.arange(n_samples)+.5, stability_hist, 'b-',
                 np.arange(n_samples)+.5, stability_weighted, 'b-',)
        for idx, char in enumerate(sentance):
            plt.text(idx + .5, .01, char, horizontalalignment='center')
        for x in boundries:
            plt.axvline(x, color='k')
        sorted_dataset = sorted(dataset)
        for idx, word_id in enumerate(tp_class_hist):
            word = sorted_dataset[word_id]
            plt.text(idx + .5, 1., word,
                rotation            = 90,
                horizontalalignment = 'center',
                verticalalignment   = 'top',)
        figure_title = "Output Layer Stability"
        if args.learned_stability:
            figure_title += " - Stability Mechanism Disabled."
        figure_title += "\nInput character at bottom, Classification at top, Vertical lines are word boundries."
        plt.title(figure_title)
        plt.ylabel('Stability')
        plt.xlabel('Time step')
        plt.show()

    if synapses_debug:
        sp.synapses.check_data_integrity()
        tm.synapses.check_data_integrity()
        tp.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    return score