def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0, callsPerSerialize=CALLS_PER_SERIALIZE): self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity) self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha, verbosity) self._calls = 0 self._callsPerSerialize = callsPerSerialize
def sdrClassifierExample(): # http://nupic.docs.numenta.org/stable/api/algorithms/classifiers.html """steps - Sequence of the different steps of multi-step predictions to learn alpha - learning rate (larger -> faster learning) actValueAlpha - Used to track the actual value within each bucket. A lower actValueAlpha results in longer term memory""" c = SDRClassifier(steps=[1], alpha=0.1, actValueAlpha=0.1, verbosity=0) # learning c.compute(recordNum=0, patternNZ=[1, 5, 9], classification={ "bucketIdx": 4, "actValue": 34.7 }, learn=True, infer=False) # inference result = c.compute(recordNum=1, patternNZ=[1, 5, 9], classification={ "bucketIdx": 4, "actValue": 34.7 }, learn=False, infer=True) # Print the top three predictions for 1 steps out. topPredictions = sorted(zip(result[1], result["actualValues"]), reverse=True)[:3] for prob, value in topPredictions: print("Prediction of {} has prob: {}.".format(value, prob * 100.0))
def testSoftMaxOverflow(self): """ Test if the softmax normalization overflows """ c = SDRClassifier([1], 1.0, 0.1, 0) weight = numpy.array([[sys.float_info.max_exp + 1]]) res = c.inferSingleStep([0], weight) self.assertFalse(numpy.isnan(res), "SoftMax overflow")
def initializeClassifiers(Nelements, encoder): claClassiiier = CLAClassifier(steps=[0]) sdrClassifier = SDRClassifier(steps=[0], alpha=0.1) patternNZ = list(numpy.where(encoder.encode(Nelements - 1))[0]) classification = {'bucketIdx': Nelements - 1, 'actValue': Nelements - 1} # feed in the pattern with the highest bucket index claRetval = claClassiiier.compute(0, patternNZ, classification, learn=True, infer=True) sdrRetval = sdrClassifier.compute(0, patternNZ, classification, learn=True, infer=True) return claClassiiier, sdrClassifier
class SDRClassifierDiff(object): """Classifier-like object that diffs the output from different classifiers. Instances of each version of the SDR classifier are created and each call to compute is passed to each version of the classifier. The results are diffed to make sure the there are no differences. Optionally, the classifiers can be serialized and deserialized after a specified number of calls to compute to ensure that serialization does not cause discrepencies between the results. TODO: Check internal state as well. TODO: Provide option to write output to a file. TODO: Provide record differences without throwing an exception. """ __VERSION__ = 'SDRClassifierDiffV1' def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0, callsPerSerialize=CALLS_PER_SERIALIZE): self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity) self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha, verbosity) self._calls = 0 self._callsPerSerialize = callsPerSerialize def compute(self, recordNum, patternNZ, classification, learn, infer): result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification, learn, infer) result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ, classification, learn, infer) self._calls += 1 # Check if it is time to serialize and deserialize. if self._calls % self._callsPerSerialize == 0: self._sdrClassifier = pickle.loads(pickle.dumps(self._sdrClassifier)) self._sdrClassifierCpp = pickle.loads(pickle.dumps( self._sdrClassifierCpp)) # Assert both results are the same type. assert type(result1) == type(result2) # Assert that the keys match. assert set(result1.keys()) == set(result2.keys()), "diff detected: " \ "py result=%s, C++ result=%s" % (result1, result2) # Assert that the values match. for k, l in result1.iteritems(): assert type(l) == type(result2[k]) for i in xrange(len(l)): if isinstance(classification['actValue'], numbers.Real): assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, ( 'Python SDRClassifier has value %f and C++ SDRClassifierCpp has ' 'value %f.' % (l[i], result2[k][i])) else: assert l[i] == result2[k][i], ( 'Python SDRClassifier has value %s and C++ SDRClassifierCpp has ' 'value %s.' % (str(l[i]), str(result2[k][i]))) return result1
def read(proto): """ proto: SDRClassifierRegionProto capnproto object """ impl = proto.classifierImp if impl == "py": return SDRClassifier.read(proto.claClassifier) else: raise ValueError("Invalid classifier implementation (%r). Value must be " '"py".' % impl)
def read(proto): """ proto: SDRClassifierRegionProto capnproto object """ impl = proto.implementation if impl == 'py': return SDRClassifier.read(proto.sdrClassifier) else: raise ValueError('Invalid classifier implementation (%r). Value must be ' '"py".' % impl)
def read(proto): """ proto: SDRClassifierRegionProto capnproto object """ impl = proto.classifierImp if impl == 'py': return SDRClassifier.read(proto.claClassifier) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py".' % impl)
def read(proto): """ proto: SDRClassifierRegionProto capnproto object """ impl = proto.implementation if impl == "py": return SDRClassifier.read(proto.sdrClassifier) elif impl == "cpp": return FastSDRClassifier.read(proto.sdrClassifier) else: raise ValueError("Invalid classifier implementation (%r). Value must be " '"py" or "cpp".' % impl)
def testWriteRead(self): c1 = SDRClassifier([0], 0.1, 0.1, 0) # Create a vector of input bit indices input1 = [1, 5, 9] result = c1.compute(recordNum=0, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) proto1 = SdrClassifier_capnp.SdrClassifierProto.new_message() c1.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = SdrClassifier_capnp.SdrClassifierProto.read(f) # Load the deserialized proto c2 = SDRClassifier.read(proto2) self.assertEqual(c1.steps, c2.steps) self.assertAlmostEqual(c1.alpha, c2.alpha) self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha) self.assertEqual(c1._learnIteration, c2._learnIteration) self.assertEqual(c1._recordNumMinusLearnIteration, c2._recordNumMinusLearnIteration) self.assertEqual(c1._patternNZHistory, c2._patternNZHistory) self.assertEqual(c1._weightMatrix.keys(), c2._weightMatrix.keys()) for step in c1._weightMatrix.keys(): c1Weight = c1._weightMatrix[step] c2Weight = c2._weightMatrix[step] self.assertSequenceEqual(list(c1Weight.flatten()), list(c2Weight.flatten())) self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx) self.assertEqual(c1._maxInputIdx, c2._maxInputIdx) self.assertEqual(len(c1._actualValues), len(c2._actualValues)) for i in xrange(len(c1._actualValues)): self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5) self.assertEqual(c1._version, c2._version) self.assertEqual(c1.verbosity, c2.verbosity) result1 = c1.compute(recordNum=1, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) result2 = c2.compute(recordNum=1, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) self.assertEqual(result1.keys(), result2.keys()) for key in result1.keys(): for i in xrange(len(c1._actualValues)): self.assertAlmostEqual(result1[key][i], result2[key][i], 5)
def read(proto): """ :param proto: SDRClassifierRegionProto capnproto object """ impl = proto.implementation if impl == 'py': return SDRClassifier.read(proto.sdrClassifier) elif impl == 'cpp': return FastSDRClassifier.read(proto.sdrClassifier) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def read(proto): """ :param proto: SDRClassifierRegionProto capnproto object """ impl = proto.implementation if impl == 'py': return SDRClassifier.read(proto.sdrClassifier) elif impl == 'cpp': return FastSDRClassifier.read(proto.sdrClassifier) elif impl == 'diff': return SDRClassifierDiff.read(proto.sdrClassifier) else: raise ValueError('Invalid classifier implementation (%r). Value must be ' '"py", "cpp" or "diff".' % impl)
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in src/nupic/support/nupic-default.xml """ impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.sdrClassifier.implementation') if impl == 'py': return SDRClassifier(*args, **kwargs) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py".' % impl)
def __init__(self, model_params): # Init an HTM network self.network = Network() # Getting parameters for network regions self.sensor_params = model_params['Sensor'] self.spatial_pooler_params = model_params['SpatialPooler'] self.temporal_memory_params = model_params['TemporalMemory'] self.classifiers_params = model_params['Classifiers'] self.encoders_params = model_params['Encoders'] # Adding regions to HTM network self.network.addRegion('DurationEncoder', 'ScalarSensor', json.dumps(self.encoders_params['duration'])) self.network.addRegion('VelocityEncoder', 'ScalarSensor', json.dumps(self.encoders_params['pitch'])) self.network.addRegion('PitchEncoder', 'ScalarSensor', json.dumps(self.encoders_params['velocity'])) self.network.addRegion('SpatialPooler', 'py.SPRegion', json.dumps(self.spatial_pooler_params)) self.network.addRegion('TemporalMemory', 'py.TMRegion', json.dumps(self.temporal_memory_params)) # Creating outer classifiers for multifield prediction dclp = self.classifiers_params['duration'] vclp = self.classifiers_params['pitch'] pclp = self.classifiers_params['velocity'] self.duration_classifier = SDRClassifier( steps=(1, ), verbosity=dclp['verbosity'], alpha=dclp['alpha'], actValueAlpha=dclp['actValueAlpha']) self.velocity_classifier = SDRClassifier( steps=(1, ), verbosity=vclp['verbosity'], alpha=vclp['alpha'], actValueAlpha=vclp['actValueAlpha']) self.pitch_classifier = SDRClassifier( steps=(1, ), verbosity=pclp['verbosity'], alpha=pclp['alpha'], actValueAlpha=pclp['actValueAlpha']) self._link_all_regions() self._enable_learning() self._enable_inference() self.network.initialize()
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in `Default NuPIC Configuration <default-config.html>`_. """ impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.sdrClassifier.implementation') if impl == 'py': return SDRClassifier(*args, **kwargs) elif impl == 'cpp': return FastSDRClassifier(*args, **kwargs) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def main(): # cluster similar inputs together in SDR space s = SpatialPooler() print(type(s)) # powerful sequence memory in SDR space t = TemporalMemory() print(type(t)) # computes rolling Gaussian based on raw anomaly scores and then their # likelihood a = AnomalyLikelihood() print(type(a)) # temporally groups active cell sets from TM u = UnionTemporalPooler() print(type(u)) # learning pairings of Union representations and labeled classes c = SDRClassifier() print(type(c))
def compute(self, recordNum, patternNZ, classification, learn, infer): result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification, learn, infer) result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ, classification, learn, infer) self._calls += 1 # Check if it is time to serialize and deserialize. if self._calls % self._callsPerSerialize == 0: schemaPy = self._sdrClassifier.getSchema() protoPy = schemaPy.new_message() self._sdrClassifier.write(protoPy) protoPy = schemaPy.from_bytes(protoPy.to_bytes()) self._sdrClassifier = SDRClassifier.read(protoPy) schemaCpp = self._sdrClassifierCpp.getSchema() protoCpp = schemaCpp.new_message() self._sdrClassifierCpp.write(protoCpp) protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes()) self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp) # Assert both results are the same type. assert type(result1) == type(result2) # Assert that the keys match. assert set(result1.keys()) == set(result2.keys()), "diff detected: " \ "py result=%s, C++ result=%s" % (result1, result2) # Assert that the values match. for k, l in result1.items(): assert type(l) == type(result2[k]) for i in range(len(l)): if isinstance(classification['actValue'], numbers.Real): assert abs( float(l[i]) - float(result2[k][i]) ) < 0.0000001, ( 'Python SDRClassifier has value %f and C++ SDRClassifierCpp has ' 'value %f.' % (l[i], result2[k][i])) else: assert l[i] == result2[k][i], ( 'Python SDRClassifier has value %s and C++ SDRClassifierCpp has ' 'value %s.' % (str(l[i]), str(result2[k][i]))) return result1
def compute(self, recordNum, patternNZ, classification, learn, infer): result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification, learn, infer) result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ, classification, learn, infer) self._calls += 1 # Check if it is time to serialize and deserialize. if self._calls % self._callsPerSerialize == 0: schemaPy = self._sdrClassifier.getSchema() protoPy = schemaPy.new_message() self._sdrClassifier.write(protoPy) protoPy = schemaPy.from_bytes(protoPy.to_bytes()) self._sdrClassifier = SDRClassifier.read(protoPy) schemaCpp = self._sdrClassifierCpp.getSchema() protoCpp = schemaCpp.new_message() self._sdrClassifierCpp.write(protoCpp) protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes()) self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp) # Assert both results are the same type. assert type(result1) == type(result2) # Assert that the keys match. assert set(result1.keys()) == set(result2.keys()), "diff detected: " \ "py result=%s, C++ result=%s" % (result1, result2) # Assert that the values match. for k, l in result1.iteritems(): assert type(l) == type(result2[k]) for i in xrange(len(l)): if isinstance(classification['actValue'], numbers.Real): assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, ( 'Python SDRClassifier has value %f and C++ SDRClassifierCpp has ' 'value %f.' % (l[i], result2[k][i])) else: assert l[i] == result2[k][i], ( 'Python SDRClassifier has value %s and C++ SDRClassifierCpp has ' 'value %s.' % (str(l[i]), str(result2[k][i]))) return result1
from nupic.algorithms.spatial_pooler import SpatialPooler from nupic.algorithms.temporal_memory import TemporalMemory from nupic.algorithms.sdr_classifier import SDRClassifier N = 900 x = np.sin(np.arange(N) * 2 * np.pi / 30.0) inputDimensions = (256, ) columnDimensions = (512, ) encoder = ScalarEncoder(21, -1.0, 1.0, n=inputDimensions[0]) sp = SpatialPooler(inputDimensions=inputDimensions, columnDimensions=columnDimensions, globalInhibition=True, numActiveColumnsPerInhArea=21) tm = TemporalMemory(columnDimensions=columnDimensions) c = SDRClassifier(steps=[1], alpha=0.1, actValueAlpha=0.1, verbosity=0) x_true = x[1:] x_predict = np.zeros(len(x) - 1) for i, xi in tqdm(enumerate(x[:-1])): encoded = encoder.encode(xi) bucketIdx = np.where(encoded > 0)[0][0] spd = np.zeros(columnDimensions[0]) sp.compute(encoded, True, spd) active_indices = np.where(spd > 0)[0] tm.compute(active_indices) active_cell_indices = tm.getActiveCells() predictive_cell_indices = tm.getPredictiveCells() patternNZ = np.asarray(active_cell_indices)
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument( '-t', '--time', type=float, default=1, help='Number of times to run through the training data.') parser.add_argument('--debug', action='store_true') args = parser.parse_args(args=argv) # Load data. train_labels, train_images, test_labels, test_images = load_mnist() if False: # Experiment to verify that input dimensions are handled correctly If # you enable this, don't forget to rescale the radii as well as the # input. from scipy.ndimage import zoom new_sz = (1, 4, 1) train_images = [zoom(im, new_sz, order=0) for im in train_images] test_images = [zoom(im, new_sz, order=0) for im in test_images] training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) random.shuffle(test_data) if args.debug and args.time < 1: test_data = test_data[:int(len(test_data) * args.time)] # Setup spatial pooler machine. enc = BWImageEncoder(train_images[0].shape[:2]) sp = SpatialPooler(input_sdr=enc.output, segments=1, **parameters) sdrc = SDRClassifier(steps=[0]) if verbose: print(sp.statistics()) # Training Loop train_cycles = len(train_images) * args.time if verbose: print("Training for %d cycles" % train_cycles) for i in range(int(round(train_cycles))): sp.reset() img, lbl = random.choice(training_data) img = synthesize(img, diag=False) enc.encode(np.squeeze(img)) sp.compute() sdrc.compute(i, sp.columns.flat_index, classification={ "bucketIdx": lbl, "actValue": lbl }, learn=True, infer=False) if verbose: print("Done training.") print("") print("Removing zero permanence synapses.") sp.synapses.remove_zero_permanence_synapses() print(sp.statistics()) # Testing Loop if verbose: print("Testing for %d cycles." % len(test_data)) score = 0 for img, lbl in test_data: enc.encode(np.squeeze(img)) sp.compute(learn=False) try: inference = sdrc.infer(sp.columns.flat_index, None)[0] except IndexError: inference = np.zeros(10) if lbl == np.argmax(inference): score += 1 print('Score:', 100 * score / len(test_data), '%') if synapses_debug: sp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return score / len(test_data)
class HTMusicModel(object): def __init__(self, model_params): # Init an HTM network self.network = Network() # Getting parameters for network regions self.sensor_params = model_params['Sensor'] self.spatial_pooler_params = model_params['SpatialPooler'] self.temporal_memory_params = model_params['TemporalMemory'] self.classifiers_params = model_params['Classifiers'] self.encoders_params = model_params['Encoders'] # Adding regions to HTM network self.network.addRegion('DurationEncoder', 'ScalarSensor', json.dumps(self.encoders_params['duration'])) self.network.addRegion('VelocityEncoder', 'ScalarSensor', json.dumps(self.encoders_params['pitch'])) self.network.addRegion('PitchEncoder', 'ScalarSensor', json.dumps(self.encoders_params['velocity'])) self.network.addRegion('SpatialPooler', 'py.SPRegion', json.dumps(self.spatial_pooler_params)) self.network.addRegion('TemporalMemory', 'py.TMRegion', json.dumps(self.temporal_memory_params)) # Creating outer classifiers for multifield prediction dclp = self.classifiers_params['duration'] vclp = self.classifiers_params['pitch'] pclp = self.classifiers_params['velocity'] self.duration_classifier = SDRClassifier( steps=(1, ), verbosity=dclp['verbosity'], alpha=dclp['alpha'], actValueAlpha=dclp['actValueAlpha']) self.velocity_classifier = SDRClassifier( steps=(1, ), verbosity=vclp['verbosity'], alpha=vclp['alpha'], actValueAlpha=vclp['actValueAlpha']) self.pitch_classifier = SDRClassifier( steps=(1, ), verbosity=pclp['verbosity'], alpha=pclp['alpha'], actValueAlpha=pclp['actValueAlpha']) self._link_all_regions() self._enable_learning() self._enable_inference() self.network.initialize() def _link_all_regions(self): # Linking regions self.network.link('DurationEncoder', 'SpatialPooler', 'UniformLink', '') self.network.link('VelocityEncoder', 'SpatialPooler', 'UniformLink', '') self.network.link('PitchEncoder', 'SpatialPooler', 'UniformLink', '') self.network.link('SpatialPooler', 'TemporalMemory', 'UniformLink', '', srcOutput='bottomUpOut', destInput='bottomUpIn') def _enable_learning(self): # Enable learning for all regions. self.network.regions["SpatialPooler"].setParameter("learningMode", 1) self.network.regions["TemporalMemory"].setParameter("learningMode", 1) def _enable_inference(self): # Enable inference for all regions. self.network.regions["SpatialPooler"].setParameter("inferenceMode", 1) self.network.regions["TemporalMemory"].setParameter("inferenceMode", 1) def train(self, duration, pitch, velocity): records_total = self.network.regions['SpatialPooler'].getSelf( ).getAlgorithmInstance().getIterationNum() self.network.regions['DurationEncoder'].setParameter( 'sensedValue', duration) self.network.regions['PitchEncoder'].setParameter('sensedValue', pitch) self.network.regions['VelocityEncoder'].setParameter( 'sensedValue', velocity) self.network.run(1) # Getting active cells of TM and bucket indicies of encoders to feed classifiers active_cells = numpy.array( self.network.regions['TemporalMemory'].getOutputData( 'bottomUpOut')).nonzero()[0] duration_bucket = numpy.array( self.network.regions['DurationEncoder'].getOutputData('bucket')) pitch_bucket = numpy.array( self.network.regions['PitchEncoder'].getOutputData('bucket')) velocity_bucket = numpy.array( self.network.regions['VelocityEncoder'].getOutputData('bucket')) duration_classifier_result = self.duration_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': duration_bucket[0], 'actValue': duration }, learn=True, infer=False) pitch_classifier_result = self.pitch_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': pitch_bucket[0], 'actValue': pitch }, learn=True, infer=False) velocity_classifier_result = self.velocity_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': velocity_bucket[0], 'actValue': velocity }, learn=True, infer=False) def generate(self, seed, output_dir, event_amount): records_total = self.network.regions['SpatialPooler'].getSelf( ).getAlgorithmInstance().getIterationNum() seed = seed midi = pretty_midi.PrettyMIDI() midi_program = pretty_midi.instrument_name_to_program( 'Acoustic Grand Piano') piano = pretty_midi.Instrument(program=midi_program) clock = 0 for iters in tqdm(range(records_total, records_total + event_amount)): duration = seed[0] pitch = seed[1] velocity = seed[2] self.network.regions['DurationEncoder'].setParameter( 'sensedValue', duration) self.network.regions['PitchEncoder'].setParameter( 'sensedValue', pitch) self.network.regions['VelocityEncoder'].setParameter( 'sensedValue', velocity) self.network.run(1) # Getting active cells of TM and bucket indicies of encoders to feed classifiers active_cells = numpy.array( self.network.regions['TemporalMemory'].getOutputData( 'bottomUpOut')).nonzero()[0] duration_bucket = numpy.array( self.network.regions['DurationEncoder'].getOutputData( 'bucket')) pitch_bucket = numpy.array( self.network.regions['PitchEncoder'].getOutputData('bucket')) velocity_bucket = numpy.array( self.network.regions['VelocityEncoder'].getOutputData( 'bucket')) # Getting up classifiers result duration_classifier_result = self.duration_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': duration_bucket[0], 'actValue': duration }, learn=False, infer=True) pitch_classifier_result = self.pitch_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': pitch_bucket[0], 'actValue': pitch }, learn=False, infer=True) velocity_classifier_result = self.velocity_classifier.compute( recordNum=records_total, patternNZ=active_cells, classification={ 'bucketIdx': velocity_bucket[0], 'actValue': velocity }, learn=False, infer=True) du = duration_classifier_result[1].argmax() pi = pitch_classifier_result[1].argmax() ve = velocity_classifier_result[1].argmax() duration_top_probs = duration_classifier_result[1][ 0:2] / duration_classifier_result[1][0:2].sum() predicted_duration = duration_classifier_result['actualValues'][du] # predicted_duration = duration_classifier_result['actualValues'][du] predicted_pitch = pitch_classifier_result['actualValues'][pi] predicted_velocity = velocity_classifier_result['actualValues'][ve] # print duration_classifier_result note = pretty_midi.Note(velocity=int(predicted_velocity), pitch=int(predicted_pitch), start=float(clock), end=float(clock + predicted_duration)) piano.notes.append(note) clock = clock + 0.25 seed[0] = predicted_duration seed[1] = predicted_pitch seed[2] = predicted_velocity midi.instruments.append(piano) midi.remove_invalid_notes() time = datetime.datetime.now().strftime('%Y-%m-%d %H:%m:%S') midi.write(output_dir + time + '.mid') def load_model(self, load_path): # Loading SpatialPooler print 'Loading SpatialPooler' with open(load_path + 'sp.bin', 'rb') as sp: sp_builder = SpatialPoolerProto.read( sp, traversal_limit_in_words=2**61) self.network.regions['SpatialPooler'].getSelf( )._sfdr = self.network.regions['SpatialPooler'].getSelf()._sfdr.read( sp_builder) # Loading TemporalMemory print 'Loading TemporalMemory' self.network.regions['TemporalMemory'].getSelf().getAlgorithmInstance( ).loadFromFile(load_path + 'tm.bin') # Loading end classifier print 'Loading duration classifier' with open(load_path + 'dcl.bin', 'rb') as dcl: dcl_builder = SdrClassifierProto.read( dcl, traversal_limit_in_words=2**61) self.duration_classifier = self.duration_classifier.read(dcl_builder) # Loading pitch classifier print 'Loading pitch classifier' with open(load_path + 'pcl.bin', 'rb') as pcl: pcl_builder = SdrClassifierProto.read( pcl, traversal_limit_in_words=2**61) self.pitch_classifier = self.pitch_classifier.read(pcl_builder) # Loading velocity classifier print 'Loading velocity classifier' with open(load_path + 'vcl.bin', 'rb') as vcl: vcl_builder = SdrClassifierProto.read( vcl, traversal_limit_in_words=2**61) self.velocity_classifier = self.velocity_classifier.read(vcl_builder) def save_model(self, save_path): # Saving SpatialPooler print 'Saving SpatialPooler' sp_builder = SpatialPoolerProto.new_message() self.network.regions['SpatialPooler'].getSelf().getAlgorithmInstance( ).write(sp_builder) with open(save_path + 'sp.bin', 'w+b') as sp: sp_builder.write(sp) # Saving TemporalMemory print 'Saving TemporalMemory' self.network.regions['TemporalMemory'].getSelf().getAlgorithmInstance( ).saveToFile(save_path + 'tm.bin') # Saving end classifier print 'Saving duration classifier' dcl_builder = SdrClassifierProto.new_message() self.duration_classifier.write(dcl_builder) with open(save_path + 'dcl.bin', 'w+b') as dcl: dcl_builder.write(dcl) # Saving pitch classifier print 'Saving pitch classifier' pcl_builder = SdrClassifierProto.new_message() self.pitch_classifier.write(pcl_builder) with open(save_path + 'pcl.bin', 'w+b') as pcl: pcl_builder.write(pcl) # Saving velocity classifier print 'Saving velocity classifier' vcl_builder = SdrClassifierProto.new_message() self.velocity_classifier.write(vcl_builder) with open(save_path + 'vcl.bin', 'w+b') as vcl: vcl_builder.write(vcl)
class SDRClassifierDiff(object): """Classifier-like object that diffs the output from different classifiers. Instances of each version of the SDR classifier are created and each call to compute is passed to each version of the classifier. The results are diffed to make sure the there are no differences. Optionally, the classifiers can be serialized and deserialized after a specified number of calls to compute to ensure that serialization does not cause discrepencies between the results. TODO: Check internal state as well. TODO: Provide option to write output to a file. TODO: Provide record differences without throwing an exception. """ __VERSION__ = 'SDRClassifierDiffV1' def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0, callsPerSerialize=CALLS_PER_SERIALIZE): self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity) self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha, verbosity) self._calls = 0 self._callsPerSerialize = callsPerSerialize def compute(self, recordNum, patternNZ, classification, learn, infer): result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification, learn, infer) result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ, classification, learn, infer) self._calls += 1 # Check if it is time to serialize and deserialize. if self._calls % self._callsPerSerialize == 0: schemaPy = self._sdrClassifier.getSchema() protoPy = schemaPy.new_message() self._sdrClassifier.write(protoPy) protoPy = schemaPy.from_bytes(protoPy.to_bytes()) self._sdrClassifier = SDRClassifier.read(protoPy) schemaCpp = self._sdrClassifierCpp.getSchema() protoCpp = schemaCpp.new_message() self._sdrClassifierCpp.write(protoCpp) protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes()) self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp) # Assert both results are the same type. assert type(result1) == type(result2) # Assert that the keys match. assert set(result1.keys()) == set(result2.keys()), "diff detected: " \ "py result=%s, C++ result=%s" % (result1, result2) # Assert that the values match. for k, l in result1.iteritems(): assert type(l) == type(result2[k]) for i in xrange(len(l)): if isinstance(classification['actValue'], numbers.Real): assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, ( 'Python SDRClassifier has value %f and C++ SDRClassifierCpp has ' 'value %f.' % (l[i], result2[k][i])) else: assert l[i] == result2[k][i], ( 'Python SDRClassifier has value %s and C++ SDRClassifierCpp has ' 'value %s.' % (str(l[i]), str(result2[k][i]))) return result1
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument( '--episode_length', type=int, default=100, ) parser.add_argument( '--train_episodes', type=int, default=100 * len(patterns), ) parser.add_argument( '--test_episodes', type=int, default=20 * len(patterns), ) parser.add_argument( '--environment_size', type=int, default=40, ) parser.add_argument('--move_env', action='store_true') parser.add_argument('--show_pattern', action='store_true') args = parser.parse_args(args=argv) # PARAMETER OVERRIDES! parameters['grid_cells'] = default_parameters['grid_cells'] if verbose: import pprint print("Parameters = ", end='') pprint.pprint(parameters) print("Episode Length", args.episode_length) env = Environment(size=args.environment_size) gc = GridCellEncoder(**parameters['grid_cells']) trajectory = TemporalMemory(column_sdr=gc.grid_cells, context_sdr=None, anomaly_alpha=1. / 1000, predicted_boost=1, segments_per_cell=20, **parameters['trajectory']) trajectory_sdrc = SDRClassifier(steps=[0]) motion = StableSpatialPooler(input_sdr=SDR(trajectory.active), **parameters['motion']) motion_sdrc = SDRClassifier(steps=[0]) def reset(): env.reset() gc.reset() trajectory.reset() motion.reset() env_offset = np.zeros(2) def compute(learn=True): gc_sdr = gc.encode(env.position + env_offset) trajectory.compute( column_sdr=gc_sdr, learn=learn, ) motion.compute( input_sdr=trajectory.active, input_learning_sdr=trajectory.learning, learn=learn, ) # Train if verbose: print("Training for %d episodes ..." % args.train_episodes) start_time = time.time() for session in range(args.train_episodes): reset() pattern = random.randrange(len(patterns)) pattern_func = patterns[pattern] for step in range(args.episode_length): angle = pattern_func(env.angle * 180 / math.pi, motion.age) * math.pi / 180 env.move(angle) if env.collision: reset() continue compute() trajectory_sdrc.compute(trajectory.age, trajectory.learning.flat_index, classification={ "bucketIdx": pattern, "actValue": pattern }, learn=True, infer=False) motion_sdrc.compute(motion.age, motion.columns.flat_index, classification={ "bucketIdx": pattern, "actValue": pattern }, learn=True, infer=False) if verbose and motion.age % 10000 == 0: print("Cycle %d" % motion.age) if args.show_pattern: env.plot_course() if verbose: train_time = time.time() - start_time start_time = time.time() print("Elapsed time (training): %d seconds." % int(round(train_time))) print("") print("Trajectory", trajectory.statistics()) print("Motion", motion.statistics()) print("") # Test if verbose: print("Testing for %d episodes ..." % args.test_episodes) if args.move_env: env_offset = np.array([9 * env.size, 9 * env.size]) if verbose: print("Moved to new environment.") trajectory_accuracy = 0 motion_accuracy = 0 sample_size = 0 trajectory_confusion = np.zeros((len(patterns), len(patterns))) motion_confusion = np.zeros((len(patterns), len(patterns))) for episode in range(args.test_episodes): reset() pattern = random.randrange(len(patterns)) pattern_func = patterns[pattern] for step in range(args.episode_length): angle = pattern_func(env.angle * 180 / math.pi, motion.age) * math.pi / 180 env.move(angle) if env.collision: reset() continue compute(learn=True) trajectory_inference = trajectory_sdrc.infer( trajectory.learning.flat_index, None)[0] if pattern == np.argmax(trajectory_inference): trajectory_accuracy += 1 trajectory_confusion[pattern][np.argmax(trajectory_inference)] += 1 motion_inference = motion_sdrc.infer(motion.columns.flat_index, None)[0] if pattern == np.argmax(motion_inference): motion_accuracy += 1 motion_confusion[pattern][np.argmax(motion_inference)] += 1 sample_size += 1 trajectory_accuracy /= sample_size motion_accuracy /= sample_size if verbose: print("Trajectory Accuracy %g, %d catagories." % (trajectory_accuracy, len(patterns))) print("Motion Accuracy %g" % motion_accuracy) # Display Confusion Matixes if verbose: conf_matrices = ( trajectory_confusion, motion_confusion, ) conf_titles = ( 'Trajectory', 'Motion', ) # plt.figure("Pattern Recognition Confusion") for subplot_idx, matrix_title in enumerate( zip(conf_matrices, conf_titles)): matrix, title = matrix_title plt.subplot(1, len(conf_matrices), subplot_idx + 1) plt.title(title + " Confusion") matrix_sum = np.sum(matrix, axis=1) matrix_sum[matrix_sum == 0] = 1 matrix = (matrix.T / matrix_sum).T plt.imshow(matrix, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') if synapses_debug: gc.synapses.check_data_integrity() trajectory.synapses.check_data_integrity() motion.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") if verbose: test_time = time.time() - start_time print("Elapsed time (testing): %d seconds." % int(round(test_time))) plt.show() return motion_accuracy
printTPRegionParams(model._getTPRegion()) inputData = "%s/%s.csv" % (DATA_DIR, dataSet.replace(" ", "_")) sensor = model._getSensorRegion() encoderList = sensor.getSelf().encoder.getEncoderList() if sensor.getSelf().disabledEncoder is not None: classifier_encoder = sensor.getSelf().disabledEncoder.getEncoderList() classifier_encoder = classifier_encoder[0] else: classifier_encoder = None # initialize new SDR classifier numTMcells = model._getTPRegion().getSelf()._tfdr.numberOfCells() sdrClassifier = SDRClassifier(steps=[5], alpha=0.005) _METRIC_SPECS = getMetricSpecs(predictedField, stepsAhead=_options.stepsAhead) metric = metrics.getModule(_METRIC_SPECS[0]) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) if plot: plotCount = 1 plotHeight = max(plotCount * 3, 6) fig = plt.figure(figsize=(14, plotHeight)) gs = gridspec.GridSpec(plotCount, 1) plt.title(predictedField) plt.ylabel('Data') plt.xlabel('Timed') plt.tight_layout()
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('-t', '--time', type=int, default=20, help='Number of times to run through the training data.') parser.add_argument('--dataset', choices=('states', 'dictionary', 'gutenberg'), default='states') parser.add_argument('--words', type=int, default=500, help='Number of words to use.') parser.add_argument('--typo', type=float, default=0., help='Misspell words, percentage [0-1], default 0.') parser.add_argument('--practice', type=int, default=0, help='Makes the task easier by repeating words.') parser.add_argument('--learned_stability', action='store_true', help='Disable the stability mechanism during tests.') parser.add_argument('--disable_tm_sdrc', action='store_true',) args = parser.parse_args(args = argv) assert(parameters['tp_nz_value'] > 0) if verbose: print("Parameters = ", end='') import pprint pprint.pprint(parameters) print("") # Load dataset. The dataset consists of three variables: # 1) training_data is a list of words. # 2) testing_data is a list of words. # 3) dataset is dictionary of word -> identifier pairs. if args.dataset == 'states': # Remove spaces from between the two word states names. dataset = [word.replace(' ', '') for word in state_names] training_data = dataset * args.time testing_data = dataset * 5 random.shuffle(training_data) random.shuffle(testing_data) if verbose: print("Dataset is %d state names."%len(dataset)) elif args.dataset == 'dictionary': dataset = read_dictionary() dataset = random.sample(dataset, args.words) training_data = dataset * args.time testing_data = dataset * 5 random.shuffle(training_data) random.shuffle(testing_data) if verbose: print("Dataset is %d dictionary words."%len(dataset)) elif args.dataset == 'gutenberg': text = read_gutenberg(args.time) split = int(.80 * len(text)) # Fraction of data to train on. training_data = text[ : split] testing_data = text[split : ] # Put the most common words into the dataset to be trained & tested on. histogram = {} for word in training_data: if word not in histogram: histogram[word] = 0 histogram[word] += 1 histogram.pop('S', None) # Remove apostrophy 'S'. dataset = sorted(histogram, key = lambda word: histogram[word]) dataset = dataset[ -args.words : ] if verbose: print("Dataset is %d words from Project Gutenberg."%len(dataset)) unique_train = len(set(training_data)) unique_test = len(set(testing_data)) print("Unique words in training data %d, testing data %d"%(unique_train, unique_test)) dataset = {word: idx for idx, word in enumerate(sorted(set(dataset)))} if verbose: print("Training data %d words, %g%% dataset coverage."%( len(training_data), 100. * sum(1 for w in training_data if w in dataset) / len(dataset))) print("Testing data %d words, %g%% dataset coverage."%( len(testing_data), 100. * sum(1 for w in testing_data if w in dataset) / len(dataset))) print("Dataset: " + ", ".join('%d) %s'%(dataset[word], word) for word in sorted(dataset))) if args.practice: insertion_point = int(len(training_data) / 2) practice_dataset = list(dataset) random.shuffle(practice_dataset) for word in practice_dataset: for attempt in range(args.practice): training_data.insert(insertion_point, word) # Construct TM. diagnostics_alpha = parameters['sp']['boosting_alpha'] enc = EnumEncoder(**parameters['enc']) enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha) sp = SpatialPooler( input_sdr = enc.output_sdr, **parameters['sp']) tm = TemporalMemory( column_sdr = sp.columns, context_sdr = SDR((parameters['tp']['mini_columns'],)), anomaly_alpha = diagnostics_alpha, **parameters['tm']) if not args.disable_tm_sdrc: tm_sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc']) tm_sdrc.compute(-1, [tm.active.size-1], # Initialize the SDRCs internal table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) tp = StableSpatialPooler( input_sdr = tm.active, macro_columns = (1,), **parameters['tp']) tp_sdrc = SDRClassifier(steps=[0], **parameters['tp_sdrc']) tp_sdrc.compute(-1, [tp.columns.size-1], # Initialize the SDRCs internal table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) def reset(): enc.output_sdr.zero() sp.reset() tm.reset() tp.reset() def compute(char, learn): enc.encode(char) sp.compute(learn=learn) tm.context_sdr.flat_index = tp.columns.flat_index tm.context_sdr.nz_values.fill(parameters['tp_nz_value']) tm.compute(learn=learn) tp.compute(learn=learn, input_learning_sdr = tm.learning,) # TRAIN if verbose: train_cycles = sum(len(w) for w in training_data) iterations = len(training_data) / len(dataset) print("Training for %d cycles (%d dataset iterations)"%(train_cycles, iterations)) reset() for word in training_data: for idx, char in enumerate(word): compute(char, learn=True) # Process each word before training on the final character. try: label = dataset[word] except KeyError: continue if len(tm.learning) and not args.disable_tm_sdrc: tm_sdrc.compute(tm.age, tm.learning.flat_index, classification={"bucketIdx": label, "actValue": label}, learn=True, infer=False) if len(tp.columns): tp_sdrc.compute(tp.age, tp.columns.flat_index, classification={"bucketIdx": label, "actValue": label}, learn=True, infer=False) if verbose: print("Done training. System statistics:") print("") print("Encoder", enc.output_sdr.statistics()) print(sp.statistics()) print(tm.statistics()) print(tp.statistics()) print("") # TEST # Make some new words which the system has never seen before. if verbose: random_words = [] for word in dataset: alphabet = [chr(ord('A') + i) for i in range(26)] random_word = ''.join(random.choice(alphabet) for c in word) random_words.append(random_word) print("Novel Words Dataset: " + ', '.join(random_words)) print("") # Measure response to new random words. rand_word_tp_ovlp = 0. n_samples = 0 for word in random_words: reset() response = [] for char in word: compute(char, learn = False) response.append(SDR(tp.columns)) for sdr_a, sdr_b in itertools.combinations(response, 2): rand_word_tp_ovlp += sdr_a.overlap(sdr_b) n_samples += 1 rand_word_tp_ovlp /= n_samples print("Novel Words (Isolated), Average Overlap Within Word %g %%"%(100 * rand_word_tp_ovlp)) # Measure response to new random words, with the stability mechanism # turned off. stability_rate = tp.stability_rate tp.stability_rate = 1. rand_word_tp_ovlp_no_stab = 0. for word in random_words: reset() response = [] for char in word: compute(char, learn = False) response.append(SDR(tp.columns)) for sdr_a, sdr_b in itertools.combinations(response, 2): rand_word_tp_ovlp_no_stab += sdr_a.overlap(sdr_b) rand_word_tp_ovlp_no_stab /= n_samples tp.stability_rate = stability_rate print("Novel Words (Isolated), No Stability Mechanism, Avg Ovlp Within Word %g %%"%(100 * rand_word_tp_ovlp_no_stab)) # Compare new word response to that of randomly generated SDRs. rand_sdr_ovlp = 0. tp_n_active = len(tp.columns) for i in range(n_samples): sdr_a = SDR(tp.columns) sdr_b = SDR(tp.columns) sdr_a.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active)) sdr_b.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active)) rand_sdr_ovlp += sdr_a.overlap(sdr_b) rand_sdr_ovlp /= n_samples print("Random Comparable SDR(n=%d sparsity=%g%%), Average Overlap %g %%"%( tp.columns.size, 100 * tp_n_active / tp.columns.size, 100 * rand_sdr_ovlp),) print("") if args.learned_stability: tp.stability_rate = 1 if verbose: print("") print("Disabled Stability Mechanism...") print("") # Measure response to each word in isolation. if verbose: catagories = {word : [] for word in dataset} tm_accuacy = 0. tp_accuacy = 0. n_samples = 0 for word, word_id in dataset.items(): reset() for char in word: compute(char, learn = False) catagories[word].append(SDR(tp.columns)) if not args.disable_tm_sdrc: try: tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0] except IndexError: tm_inference = np.random.random(size=len(dataset)) tm_accuacy += word_id == np.argmax(tm_inference) try: tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0] except IndexError: tp_inference = np.random.random(size=len(dataset)) tp_accuacy += word_id == np.argmax(tp_inference) n_samples += 1 tm_accuacy /= n_samples tp_accuacy /= n_samples print("") print("Isolated Word Stability / Distinctiveness:") stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose) print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples)) print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples)) print("") # Measure response to words in context. Measure the overlap between the # same words in different contexts. Also check the classifier accuracy. catagories = {word : [] for word in dataset} tm_accuacy = 0. tp_accuacy = 0. tm_confusion = np.zeros((len(dataset), len(dataset))) tp_confusion = np.zeros((len(dataset), len(dataset))) n_samples = 0 reset() for word in testing_data: if random.random() < args.typo: mutated_word = mutate_word(word) else: mutated_word = word for char in mutated_word: compute(char, learn = False) if word in catagories: catagories[word].append(SDR(tp.columns)) # Check Classifier Accuracy. try: word_id = dataset[word] except KeyError: continue if not args.disable_tm_sdrc: try: tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0] except IndexError: tm_inference = np.random.random(size=len(dataset)) tm_accuacy += word_id == np.argmax(tm_inference) tm_confusion[word_id] += tm_inference / np.sum(tm_inference) try: tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0] except IndexError: tp_inference = np.random.random(size=len(dataset)) tp_accuacy += word_id == np.argmax(tp_inference) tp_confusion[word_id] += tp_inference / np.sum(tp_inference) n_samples += 1 tm_accuacy /= n_samples tp_accuacy /= n_samples if verbose: print("") print("In-Context Word Stability / Distinctiveness:") stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose) if verbose: print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples)) print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples)) score = (stability * tm_accuacy * tp_accuacy) if verbose: print("Score: %g"%score) # Display Confusion Matixes if verbose: conf_matrices = (tm_confusion, tp_confusion,) conf_titles = ('Temporal Memory', 'Temporal Pooler',) # import matplotlib.pyplot as plt plt.figure("Word Recognition Confusion") for subplot_idx, matrix_title in enumerate(zip(conf_matrices, conf_titles)): matrix, title = matrix_title plt.subplot(1, len(conf_matrices), subplot_idx + 1) plt.title(title + " Confusion") matrix /= np.sum(matrix, axis=0) plt.imshow(matrix, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') for label, idx in dataset.items(): plt.text(idx, len(dataset) + .5, label, rotation='vertical', horizontalalignment='center', verticalalignment='bottom') plt.text(-1.5, idx, label, horizontalalignment='left', verticalalignment='center') # Show a sample of input. if verbose: sentance = [] boundries = [] anomaly_hist = [] stability_hist = [] tp_active_hist = [] tp_class_hist = [] tp_prev_active = SDR(tp.columns.dimensions) n_samples = 0 sample_data = testing_data[ : 100] reset() for word in sample_data: if random.random() < args.typo: mutated_word = mutate_word(word) else: mutated_word = word for index, char in enumerate(mutated_word): compute(char, learn = False) try: tp_inference = np.argmax(tp_sdrc.infer(tp.columns.flat_index, None)[0]) except IndexError: tp_inference = random.choice(range(len(dataset))) tp_class_hist.append(tp_inference) if index == 0: boundries.append(n_samples) sentance.append(char) anomaly_hist.append(tm.anomaly) tp_active_hist.append(SDR(tp.columns)) stability_hist.append(tp.columns.overlap(tp_prev_active)) tp_prev_active = SDR(tp.columns) n_samples += 1 plt.figure("ASCII Stability") stability_weighted = overlap_stability_weighted(tp_active_hist) plt.plot( # np.arange(n_samples)+.5, anomaly_hist, 'ro', # np.arange(n_samples)+.5, stability_hist, 'b-', np.arange(n_samples)+.5, stability_weighted, 'b-',) for idx, char in enumerate(sentance): plt.text(idx + .5, .01, char, horizontalalignment='center') for x in boundries: plt.axvline(x, color='k') sorted_dataset = sorted(dataset) for idx, word_id in enumerate(tp_class_hist): word = sorted_dataset[word_id] plt.text(idx + .5, 1., word, rotation = 90, horizontalalignment = 'center', verticalalignment = 'top',) figure_title = "Output Layer Stability" if args.learned_stability: figure_title += " - Stability Mechanism Disabled." figure_title += "\nInput character at bottom, Classification at top, Vertical lines are word boundries." plt.title(figure_title) plt.ylabel('Stability') plt.xlabel('Time step') plt.show() if synapses_debug: sp.synapses.check_data_integrity() tm.synapses.check_data_integrity() tp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return score
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('-t', '--time', type=int, default=5, help='Number of times to run through the training data.') parser.add_argument('--dataset', choices=('states', 'dictionary'), default='states') args = parser.parse_args(args = argv) # Load data. if args.dataset == 'states': dataset = state_names if verbose: print("Dataset is %d state names"%len(dataset)) elif args.dataset == 'dictionary': dataset = read_dictionary() dataset = random.sample(dataset, 500) if verbose: print("Dataset is dictionary words, sample size %d"%len(dataset)) dataset = sorted(dataset) word_ids = {word: idx for idx, word in enumerate(sorted(dataset))} confusion = np.zeros((len(dataset), len(dataset))) if verbose: print("Dataset: " + ", ".join('%d) %s'%idx_word for idx_word in enumerate(dataset))) # Construct TM. diagnostics_alpha = parameters['sp']['boosting_alpha'] enc = EnumEncoder(**parameters['enc']) enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha) sp = SpatialPooler( input_sdr = enc.output_sdr, **parameters['sp']) tm = TemporalMemory( column_sdr = sp.columns, anomaly_alpha = diagnostics_alpha, **parameters['tm']) sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc']) sdrc.compute(-1, [tm.active.size-1], # Initialize the table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) def reset(): enc.output_sdr.zero() sp.reset() tm.reset() # Train. if verbose: train_cycles = args.time * sum(len(w) for w in dataset) print("Training for %d cycles (%d dataset iterations)"%(train_cycles, args.time)) for i in range(args.time): random.shuffle(dataset) for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute() tm.compute() lbl = word_ids[word] sdrc.compute(tm.age, tm.learning.flat_index, classification={"bucketIdx": lbl, "actValue": lbl}, learn=True, infer=False) if verbose: print("Encoder", enc.output_sdr.statistics()) print(sp.statistics()) print(tm.statistics()) # Test. score = 0. score_samples = 0 for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute(learn = False) tm.compute(learn = False) inference = sdrc.infer(tm.active.flat_index, None) lbl = word_ids[word] if lbl == np.argmax(inference[0]): score += 1 score_samples += 1 confusion[lbl] += inference[0] print("Score:", 100. * score / score_samples, '%') if synapses_debug: tm.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") if verbose: import matplotlib.pyplot as plt plt.figure('Confusion Matrix') plt.imshow(confusion, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') plt.show() return score / score_samples
def main(parameters=default_parameters, argv=None, verbose=True): # Setup num_objects = 100 object_sizes = range(20, 40 + 1) train_iterations = 100 test_iterations = 5 steps_per_object = range(3, 17 + 1) inputs, objects = object_dataset(num_objects, object_sizes) enc = EnumEncoder(2400, 0.02) enc.output_sdr = SDR( enc.output_sdr, activation_frequency_alpha=parameters['boosting_alpha'], average_overlap_alpha=parameters['boosting_alpha'], ) sp = StableSpatialPooler(input_sdr=enc.output_sdr, macro_columns=(1, ), **parameters) sdrc = SDRClassifier(steps=[0]) def measure_catagories(): # Compute every sensation for every object. objects_columns = [] for obj in objects: objects_columns.append([]) for sensation in obj: sp.reset() enc.encode(sensation) sp.compute(learn=False) objects_columns[-1].append(SDR(sp.columns)) sp.reset() return objects_columns if verbose: print("Num-Inputs ", len(set(itertools.chain.from_iterable(objects)))) print('Num-Objects ', num_objects) print("Object-Sizes", object_sizes) print("Steps/Object", steps_per_object) print(sp.statistics()) objects_columns = measure_catagories() measure_inter_intra_overlap(objects_columns, verbose) print("") # TRAIN train_time = train_iterations * num_objects * np.mean(steps_per_object) print('TRAINING for ~%d Cycles (%d dataset iterations) ...' % (train_time, train_iterations)) print("") sp.reset() t = 0 for iteration in range(train_iterations): object_order = list(range(num_objects)) random.shuffle(object_order) for object_id in object_order: for step in range(random.choice(steps_per_object)): sensation = random.choice(objects[object_id]) enc.encode(sensation) sp.compute() try: sdrc.compute(t, sp.columns.flat_index, classification={ "bucketIdx": object_id, "actValue": object_id, }, learn=True, infer=False) except ValueError: print("Warning: len(active) = %d." % (len(sp.columns))) t += 1 if verbose: print("TESTING ...") print("") print('Encoder Output', enc.output_sdr.statistics()) print(sp.statistics()) objects_columns = measure_catagories() _, __, stability_metric = measure_inter_intra_overlap( objects_columns, verbose) # Measure classification accuracy. This test consists of looking at every # object a few times and then classifying it. The AI is evaluated on every # cycle. score = 0 max_score = 0 sp.reset() if verbose: print("") print("Test length: %d dataset iterations." % (test_iterations)) test_data = list(range(num_objects)) for iteration in range(test_iterations): random.shuffle(test_data) for object_id in test_data: for step in range(random.choice(steps_per_object)): sensation = random.choice(objects[object_id]) enc.encode(sensation) sp.compute(learn=True) inference = sdrc.infer(sp.columns.flat_index, None)[0] inference = np.argmax(inference) if inference == object_id: score += 1 max_score += 1 if verbose: print('Classification Accuracy: %g %%' % (100 * score / max_score)) if synapses_debug: sp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return stability_metric + 10 * (score / max_score)
def _doWriteReadChecks(self, computeBeforeSerializing): c1 = SDRClassifier([0], 0.1, 0.1, 0) # Create a vector of input bit indices input1 = [1, 5, 9] if computeBeforeSerializing: result = c1.compute(recordNum=0, patternNZ=input1, classification={ 'bucketIdx': 4, 'actValue': 34.7 }, learn=True, infer=True) proto1 = SdrClassifier_capnp.SdrClassifierProto.new_message() c1.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = SdrClassifier_capnp.SdrClassifierProto.read(f) # Load the deserialized proto c2 = SDRClassifier.read(proto2) self.assertEqual(c1.steps, c2.steps) self.assertEqual(c1._maxSteps, c2._maxSteps) self.assertAlmostEqual(c1.alpha, c2.alpha) self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha) self.assertEqual(c1._patternNZHistory, c2._patternNZHistory) self.assertEqual(list(c1._weightMatrix.keys()), list(c2._weightMatrix.keys())) for step in list(c1._weightMatrix.keys()): c1Weight = c1._weightMatrix[step] c2Weight = c2._weightMatrix[step] self.assertSequenceEqual(list(c1Weight.flatten()), list(c2Weight.flatten())) self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx) self.assertEqual(c1._maxInputIdx, c2._maxInputIdx) self.assertEqual(len(c1._actualValues), len(c2._actualValues)) for i in range(len(c1._actualValues)): self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5) self.assertEqual(c1._version, c2._version) self.assertEqual(c1.verbosity, c2.verbosity) # NOTE: the previous step's actual values determine the size of lists in # results expectedActualValuesLen = len(c1._actualValues) result1 = c1.compute(recordNum=1, patternNZ=input1, classification={ 'bucketIdx': 4, 'actValue': 34.7 }, learn=True, infer=True) result2 = c2.compute(recordNum=1, patternNZ=input1, classification={ 'bucketIdx': 4, 'actValue': 34.7 }, learn=True, infer=True) self.assertEqual(list(result1.keys()), list(result2.keys())) for key in list(result1.keys()): self.assertEqual(len(result1[key]), len(result2[key])) self.assertEqual(len(result1[key]), expectedActualValuesLen) for i in range(expectedActualValuesLen): self.assertAlmostEqual(result1[key][i], result2[key][i], 5)