def chomp(self): """ Missing documentation Returns ------- Value : Type Description """ hdlog.debug("Chomping samples from model") self._raw_patterns = PatternsRaw(save_sequence=True) self._raw_patterns.chomp_spikes(spikes=self._sample_spikes) hdlog.info("Raw: %d-bit, %d patterns" % ( self._sample_spikes.N, len(self._raw_patterns))) hdlog.debug("Chomping dynamics (from network learned on the samples) applied to samples") self._hopfield_patterns = PatternsHopfield(learner=self._learner, save_sequence=True) self._hopfield_patterns.chomp_spikes(spikes=self._sample_spikes) hdlog.info("Hopfield: %d-bit, %d patterns" % ( self._sample_spikes.N, len(self._hopfield_patterns))) # print "Before dynamics:" # print self.sample_spikes.spikes # print "Applied dynamics:" self._hopfield_spikes = self._hopfield_patterns.apply_dynamics(spikes=self._sample_spikes, reshape=True)
def learn_from_binary(self, X, remove_zeros=False, disp=False): """ Trains on M x N matrix X of M N-length binary vects Parameters ---------- X : numpy array (M, N)-dim array of binary input patterns of length N, where N is the number of nodes in the network remove_zeros : bool, optional Flag whether to remove vectors from X in which all entries are 0 (default True) disp : bool, optional Display scipy L-BFGS-B output (default False) Returns ------- Nothing """ self.network = HopfieldNetMPF(len(X[0])) if remove_zeros: X_ = X[X.mean(axis=1) != 0., :] # remove all zeros hdlog.info( "Learning %d %d-bit (nonzero) binary patterns, sparsity %.04f..." % (X_.shape[0], X_.shape[1], X_.mean())) else: X_ = X hdlog.info( "Learning %d %d-bit binary patterns, sparsity %.04f..." % (X_.shape[0], X_.shape[1], X_.mean())) self.network.learn_all(X_, disp=disp)
def learn_from_binary(self, X, remove_zeros=False, disp=False): """ Trains on M x N matrix X of M N-length binary vects Parameters ---------- X : numpy array (M, N)-dim array of binary input patterns of length N, where N is the number of nodes in the network remove_zeros : bool, optional Flag whether to remove vectors from X in which all entries are 0 (default True) disp : bool, optional Display scipy L-BFGS-B output (default False) Returns ------- Nothing """ self.network = HopfieldNetMPF(len(X[0])) if remove_zeros: X_ = X[X.mean(axis=1) != 0., :] # remove all zeros hdlog.info("Learning %d %d-bit (nonzero) binary patterns, sparsity %.04f..." % ( X_.shape[0], X_.shape[1], X_.mean())) else: X_ = X hdlog.info("Learning %d %d-bit binary patterns, sparsity %.04f..." % (X_.shape[0], X_.shape[1], X_.mean())) self.network.learn_all(X_, disp=disp)
def test_patterns_hopfield(self): file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) learner = Learner(spikes) learner.learn_from_spikes(spikes) patterns = PatternsHopfield(learner=learner) patterns.chomp_spikes(spikes) # print spikes.spikes self.assertEqual(len(patterns), 3) # print "%d fixed-points (entropy H = %1.3f):" % (len(patterns), patterns.entropy()) # print map(patterns.pattern_for_key, patterns.counts.keys()) patterns.save(os.path.join(self.TMP_PATH, 'patterns')) patterns2 = PatternsHopfield.load(os.path.join(self.TMP_PATH, 'patterns')) self.assertTrue(isinstance(patterns2, PatternsHopfield)) self.assertEqual(len(patterns2), 3) self.assertEqual(len(patterns2.mtas), 3) self.assertEqual(len(patterns2.mtas_raw), 3) learner.learn_from_spikes(spikes, window_size=3) patterns = PatternsHopfield(learner=learner) patterns.chomp_spikes(spikes, window_size=3) # print spikes.spikes # print patterns.counts self.assertEqual(len(patterns), 4) # print "%d fixed-points (entropy H = %1.3f):" % (len(patterns), patterns.entropy()) # for x in patterns.list_patterns(): print x spikes_arr1 = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 0]]) spikes = Spikes(spikes=spikes_arr1) learner = Learner(spikes) learner.learn_from_spikes(spikes) # test recording fixed-points file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) learner = Learner(spikes) learner.learn_from_spikes(spikes) patterns = PatternsHopfield(learner, save_sequence=True) patterns.chomp_spikes(spikes) self.assertEqual(patterns._sequence, [0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1]) file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) learner = Learner(spikes) learner.learn_from_spikes(spikes, window_size=2) patterns = PatternsHopfield(learner, save_sequence=True) patterns.chomp_spikes(spikes, window_size=2) # print patterns.mtas # print patterns.sequence # for x in patterns.list_patterns(): print x # print spikes.spikes self.assertEqual(patterns._sequence, [0, 1, 2, 3, 0, 1, 4, 5, 6, 5, 7, 3]) # self.assertTrue(np.mean(patterns.pattern_to_binary_matrix(1) == [[0, 0], [0, 1], [1, 0]])) # self.assertTrue(np.mean(patterns.pattern_to_mta_matrix(1) == [[0, 0], [0, 1], [1, .5]])) hdlog.info(spikes._spikes) hdlog.info(patterns.pattern_to_trial_raster(3))
def test_patterns_raw(self): file_contents = np.load( os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) hdlog.info(spikes._spikes) patterns = PatternsRaw() patterns.chomp_spikes(spikes) hdlog.info(patterns._counts) self.assertEqual(len(patterns), 3) patterns = PatternsRaw() patterns.chomp_spikes(spikes, window_size=3) self.assertEqual(len(patterns), 4) file_contents = np.load( os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) patterns = PatternsRaw() patterns.chomp_spikes(spikes, window_size=3) self.assertEqual(len(patterns), 9) patterns.save(os.path.join(self.TMP_PATH, 'raw')) patterns2 = PatternsRaw.load(os.path.join(self.TMP_PATH, 'raw')) self.assertTrue(isinstance(patterns2, PatternsRaw)) self.assertEqual(len(patterns), len(patterns2))
def chomp(self): """ Missing documentation Returns ------- Value : Type Description """ hdlog.debug("Chomping samples from model") self._raw_patterns = PatternsRaw(save_sequence=True) self._raw_patterns.chomp_spikes(spikes=self._sample_spikes) hdlog.info("Raw: %d-bit, %d patterns" % (self._sample_spikes.N, len(self._raw_patterns))) hdlog.debug( "Chomping dynamics (from network learned on the samples) applied to samples" ) self._hopfield_patterns = PatternsHopfield(learner=self._learner, save_sequence=True) self._hopfield_patterns.chomp_spikes(spikes=self._sample_spikes) hdlog.info("Hopfield: %d-bit, %d patterns" % (self._sample_spikes.N, len(self._hopfield_patterns))) # print "Before dynamics:" # print self.sample_spikes.spikes # print "Applied dynamics:" self._hopfield_spikes = self._hopfield_patterns.apply_dynamics( spikes=self._sample_spikes, reshape=True)
def find_subsequences(self, thresholds, sequence=None): """ Enumerates all subsequences of length `len(thresholds)` in `sequence` (if sequence is `None` the possibly filtered sequence from the stored counter object is taken). Subsequences of length i are only considered if they appear at least `thresholds[i - 1]` times in the sequence. Parameters ---------- thresholds : list, int List of threshold values sequence : list or numpy array, optional Sequence to consider, if `None` defaults to stored sequence (default `None`) Returns ------- sequences : list of dicts List of dictionaries containing all found sequences as keys and counts as values. Keys are memory labels separated by ','. """ if sequence is None: sequence = self.sequence import collections counts = { str(item[0]): item[1] for item in collections.Counter(self.sequence).items() } all_counts = [] maxlen = len(thresholds) for l in range(2, maxlen + 2): for k in counts.keys(): if counts[k] < thresholds[l - 2]: del counts[k] all_counts.append(counts) if l == maxlen + 1: break hdlog.info('processing subsequences of length %d' % l) counts_new = {} for s in SequenceAnalyzer.subseqs(sequence, l): subkey = ','.join([str(x) for x in s[:l - 1]]) if subkey not in counts: continue key = subkey + ',' + str(s[l - 1]) if key not in counts_new: counts_new[key] = 1 else: counts_new[key] += 1 counts = counts_new return all_counts
def test_saving(self): spikes = Spikes(spikes=np.array([[1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0]])) hdlog.info(spikes.spikes) spikes.save(os.path.join(self.TMP_PATH, 'spikes')) spikes2 = Spikes.load(os.path.join(self.TMP_PATH, 'spikes')) hdlog.info(spikes2.spikes) self.assertTrue((spikes.spikes == spikes2.spikes).all())
def test_saving(self): spikes = Spikes(spikes=np.array([[1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0]])) hdlog.info(spikes.spikes) spikes.save(os.path.join(self.TMP_PATH, 'spikes')) spikes2 = Spikes.load(os.path.join(self.TMP_PATH, 'spikes')) hdlog.info(spikes2.spikes) self.assertTrue((spikes.spikes == spikes2.spikes).all())
def find_subsequences(self, thresholds, sequence=None): """ Enumerates all subsequences of length `len(thresholds)` in `sequence` (if sequence is `None` the possibly filtered sequence from the stored counter object is taken). Subsequences of length i are only considered if they appear at least `thresholds[i - 1]` times in the sequence. Parameters ---------- thresholds : list, int List of threshold values sequence : list or numpy array, optional Sequence to consider, if `None` defaults to stored sequence (default `None`) Returns ------- sequences : list of dicts List of dictionaries containing all found sequences as keys and counts as values. Keys are memory labels separated by ','. """ if sequence is None: sequence = self.sequence import collections counts = {str(item[0]): item[1] for item in collections.Counter(self.sequence).items()} all_counts = [] maxlen = len(thresholds) for l in xrange(2, maxlen + 2): for k in counts.keys(): if counts[k] < thresholds[l - 2]: del counts[k] all_counts.append(counts) if l == maxlen + 1: break hdlog.info('processing subsequences of length %d' % l) counts_new = {} for s in SequenceAnalyzer.subseqs(sequence, l): subkey = ','.join([str(x) for x in s[:l - 1]]) if subkey not in counts: continue key = subkey + ',' + str(s[l - 1]) if key not in counts_new: counts_new[key] = 1 else: counts_new[key] += 1 counts = counts_new return all_counts
def test_counter(self): file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) hdlog.info(spikes._spikes) counter = Counter() counter.chomp_spikes(spikes) hdlog.info(counter._counts) self.assertEqual(len(counter), 4) counter = Counter() counter.chomp_spikes(spikes, window_size=3) self.assertEqual(len(counter), 4) file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) counter = Counter() counter.chomp_spikes(spikes, window_size=3) self.assertEqual(len(counter), 9) counter.save(os.path.join(self.TMP_PATH, 'counter')) counter2 = Counter.load(os.path.join(self.TMP_PATH, 'counter')) self.assertTrue(isinstance(counter2, Counter)) self.assertEqual(len(counter), len(counter2)) spikes_arr1 = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 0]]) spikes = Spikes(spikes=spikes_arr1) counter1 = Counter() counter1.chomp_spikes(spikes) counter2 = Counter() counter2.chomp_spikes(spikes) counter2.merge_counts(counter1) self.assertEqual(sum(counter2._counts.values()), 6) counter3 = counter2 + counter1 self.assertEqual(counter3, counter2) self.assertEqual(sum(counter3.counts.values()), 9) spikes_arr2 = np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]]) spikes = Spikes(spikes=spikes_arr2) counter4 = Counter().chomp_spikes(spikes).merge_counts(counter3) self.assertEqual(len(counter4.counts.keys()), 5) self.assertEqual(len(counter4.patterns), 5) file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) counter = Counter(save_sequence=True) counter.chomp_spikes(spikes) self.assertEqual(counter._sequence, [0, 1, 0, 2, 3, 4, 1, 5, 4, 6, 2, 0, 6, 2, 2]) np.random.seed(42) spikes_arr = (np.random.randn(5, 10000) < .05).astype(np.int) spikes = Spikes(spikes=spikes_arr) empirical = PatternsRaw() empirical.chomp_spikes(spikes) empirical_w2 = PatternsRaw() empirical_w2.chomp_spikes(spikes, window_size=2) self.assertTrue(np.abs(empirical_w2.entropy() - 2 * empirical.entropy()) < .1)
def __init__(self, stimulus_arr=None, npz_file=None, h5_file=None, preprocess=True): """ Missing documentation Parameters ---------- stimulus_arr : Type, optional Description (default None) npz_file : Type, optional Description (default None) h5_file : Type, optional Description (default None) preprocess : bool, optional Description (default True) Returns ------- Value : Type Description """ object.__init__(self) Restoreable.__init__(self) # TODO reuse io functionality from data module! self.file_name = npz_file or '' if npz_file is None and stimulus_arr is None and h5_file is None: self._M = 0 return if stimulus_arr is not None: self._stimulus_arr = stimulus_arr if npz_file is not None: if not os.path.isfile(npz_file): hdlog.info("File '%s' does not exist!" % npz_file) return self.file_name = npz_file tmp = np.load(npz_file) self._stimulus_arr = tmp[tmp.keys()[0]] if h5_file is not None: import h5py f = h5py.File(h5_file) self._stimulus_arr = f[f.keys()[0]] if preprocess: self.preprocess() self._M = self._stimulus_arr.shape[0] self._X = self._stimulus_arr.shape[1:]
def test_learning(self): # OPR learning (Hopfield original rule) np.random.seed(42) N = 300 M = N / (4 * np.log(N)) # theoretical max for OPR [McEliece et al, 87] t = now() OPR = HopfieldNet(N) data = (np.random.random((M, N)) < .5).astype('int') OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean() # recall = OPR.exact_recalled(data, model='OPR') hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall > .8) M = 50 OPR = HopfieldNet(N) data = (np.random.random((M, N)) < .5).astype('int') OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean() # recall = OPR.exact_recalled(data) hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall < .5) MPF = HopfieldNetMPF(N) MPF.learn_all(data) recall = MPF.exact_recalled(data) hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertEqual(recall, 1) # store 90 memories in 64-bit neurons N = 64 M = 90 t = now() MPF = HopfieldNetMPF(N) data = (np.random.random((M, N)) < .5).astype('int') MPF.learn_all(data) recall = MPF.exact_recalled(data) hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertEqual(recall, 1) OPR = HopfieldNet(N) OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = OPR.exact_recalled(data) hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall < .01)
def test_learning(self): # OPR learning (Hopfield original rule) np.random.seed(42) N = 300 M = N / (4 * np.log(N)) # theoretical max for OPR [McEliece et al, 87] t = now() OPR = HopfieldNet(N) data = (np.random.random((M, N)) < .5).astype('int') OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean() # recall = OPR.exact_recalled(data, model='OPR') hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall > .8) M = 50 OPR = HopfieldNet(N) data = (np.random.random((M, N)) < .5).astype('int') OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean() # recall = OPR.exact_recalled(data) hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall < .5) MPF = HopfieldNetMPF(N) MPF.learn_all(data) recall = MPF.exact_recalled(data) hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertEqual(recall, 1) # store 90 memories in 64-bit neurons N = 64 M = 90 t = now() MPF = HopfieldNetMPF(N) data = (np.random.random((M, N)) < .5).astype('int') MPF.learn_all(data) recall = MPF.exact_recalled(data) hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertEqual(recall, 1) OPR = HopfieldNet(N) OPR.learn_all(data) recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean() # recall = OPR.exact_recalled(data) hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t)) self.assertTrue(recall < .01)
def load_legacy(cls, file_name='counter'): base, ext = os.path.splitext(file_name) if not ext: ext = ".npz" file_name = base + ext hdlog.info("Loading Counter patterns from legacy file '%s'" % file_name) instance = cls() contents = np.load(file_name) instance._counts = dict(zip(contents['count_keys'], contents['count_values'])) instance._patterns = contents['fp_list'] instance._lookup_patterns = dict(zip(contents['lookup_fp_keys'], contents['lookup_fp_values'])) instance._sequence = contents['sequence'] contents.close() return instance
def load_legacy(cls, file_name='patterns_hopfield'): # internal function to load legacy file format base, ext = os.path.splitext(file_name) if not ext: ext = ".npz" file_name = base + ext hdlog.info("Loading PatternHopfield patterns from legacy file '%s'" % file_name) instance = cls() contents = np.load(file_name) instance._counts = dict(zip(contents['count_keys'], contents['count_values'])) instance._patterns = contents['fp_list'] instance._lookup_patterns = dict(zip(contents['lookup_fp_keys'], contents['lookup_fp_values'])) instance._sequence = contents['sequence'] instance._mtas = dict(zip(contents['stas_keys'], contents['stas_values'])) instance._sequence = contents['sequence'] contents.close() return instance
def load_legacy(cls, file_name='counter'): base, ext = os.path.splitext(file_name) if not ext: ext = ".npz" file_name = base + ext hdlog.info("Loading Counter patterns from legacy file '%s'" % file_name) instance = cls() contents = np.load(file_name) instance._counts = dict( zip(contents['count_keys'], contents['count_values'])) instance._patterns = contents['fp_list'] instance._lookup_patterns = dict( zip(contents['lookup_fp_keys'], contents['lookup_fp_values'])) instance._sequence = contents['sequence'] contents.close() return instance
def distinct_patterns_over_windows(self, window_sizes=None, trials=None, save_couplings=False, remove_zeros=False): """ Returns tuple: counts, entropies [, couplings] counts, entropies: arrays of size 2 x T x WSizes (0: empirical from model sample, 1: dynamics from learned model on sample) Parameters ---------- window_sizes : Type, optional Description (default None) trials : Type, optional Description (default None) save_couplings : bool, optional Description (default False) remove_zeros : bool, optional Description (default False) Returns ------- Value : Type Description """ if window_sizes is None: window_sizes = [1] trials = trials or range(self._original_spikes.T) counts = np.zeros((2, len(trials), len(window_sizes))) #entropies = np.zeros((2, len(trials), len(window_sizes))) couplings = {} tot_learn_time = 0 for ws, window_size in enumerate(window_sizes): couplings[window_size] = [] for c, trial in enumerate(trials): hdlog.info("Trial %d | ws %d" % (trial, window_size)) self._window_size = window_size t = now() self.fit(trials=[trial], remove_zeros=remove_zeros) diff = now() - t hdlog.info("[%1.3f min]" % (diff / 60.)) tot_learn_time += diff if save_couplings: couplings[ws].append(self._learner.network.J.copy()) self.chomp() #entropies[0, c, ws] = self._raw_patterns.entropy() counts[0, c, ws] = len(self._raw_patterns) #entropies[1, c, ws] = self._hopfield_patterns.entropy() counts[1, c, ws] = len(self._hopfield_patterns) hdlog.info("Total learn time: %1.3f mins" % (tot_learn_time / 60.)) self._learn_time = tot_learn_time if save_couplings: return counts, couplings return counts
def load_legacy(cls, file_name='patterns_hopfield'): # internal function to load legacy file format base, ext = os.path.splitext(file_name) if not ext: ext = ".npz" file_name = base + ext hdlog.info("Loading PatternHopfield patterns from legacy file '%s'" % file_name) instance = cls() contents = np.load(file_name) instance._counts = dict( zip(contents['count_keys'], contents['count_values'])) instance._patterns = contents['fp_list'] instance._lookup_patterns = dict( zip(contents['lookup_fp_keys'], contents['lookup_fp_values'])) instance._sequence = contents['sequence'] instance._mtas = dict( zip(contents['stas_keys'], contents['stas_values'])) instance._sequence = contents['sequence'] contents.close() return instance
def test_patterns_raw(self): file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) hdlog.info(spikes._spikes) patterns = PatternsRaw() patterns.chomp_spikes(spikes) hdlog.info(patterns._counts) self.assertEqual(len(patterns), 4) patterns = PatternsRaw() patterns.chomp_spikes(spikes, window_size=3) self.assertEqual(len(patterns), 4) file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz')) spikes = Spikes(file_contents[file_contents.keys()[0]]) patterns = PatternsRaw() patterns.chomp_spikes(spikes, window_size=3) self.assertEqual(len(patterns), 9) patterns.save(os.path.join(self.TMP_PATH, 'raw')) patterns2 = PatternsRaw.load(os.path.join(self.TMP_PATH, 'raw')) self.assertTrue(isinstance(patterns2, PatternsRaw)) self.assertEqual(len(patterns), len(patterns2))
def distinct_patterns_over_windows(self, window_sizes=None, trials=None, save_couplings=False, remove_zeros=False): """ Returns tuple: counts, entropies [, couplings] counts, entropies: arrays of size 2 x T x WSizes (0: empirical from model sample, 1: dynamics from learned model on sample) Parameters ---------- window_sizes : Type, optional Description (default None) trials : Type, optional Description (default None) save_couplings : bool, optional Description (default False) remove_zeros : bool, optional Description (default False) Returns ------- Value : Type Description """ if window_sizes is None: window_sizes = [1] trials = trials or range(self._original_spikes.T) counts = np.zeros((2, len(trials), len(window_sizes))) entropies = np.zeros((2, len(trials), len(window_sizes))) tot_learn_time = 0 for ws, window_size in enumerate(window_sizes): for c, trial in enumerate(trials): hdlog.info("Trial %d | ws %d" % (trial, window_size)) self._window_size = window_size t = now() self.fit(trials=[trial], remove_zeros=remove_zeros) diff = now() - t hdlog.info("[%1.3f min]" % (diff / 60.)) tot_learn_time += diff if save_couplings: couplings[trial].append(self._learner.network.J.copy()) self.chomp() entropies[0, c, ws] = self._raw_patterns.entropy() counts[0, c, ws] = len(self._raw_patterns) entropies[1, c, ws] = self._hopfield_patterns.entropy() counts[1, c, ws] = len(self._hopfield_patterns) hdlog.info("Total learn time: %1.3f mins" % (tot_learn_time / 60.)) self._learn_time = tot_learn_time if save_couplings: return counts, entropies, couplings return counts, entropies
def read_spikes(path_or_files, rate, first_cluster=2, filter_silent=True, return_status=False): """ Reader for `KlustaKwick <https://github.com/klusta-team/klustakwik>`_ files. Parameters ---------- path_or_files : string path of data set or list of \*.res.\* files to load rate : float sampling rate [in Hz] discard_first_cluster : integer, optional discard first n clusters, commonly used for unclassified spikes (default 2) filter_silent : boolean, optional filter out clusters that have no spikes (default True) return_status : boolean, optional if True returns a status dictionary along with data as second return value (default False) Returns ------- spikes_times : numpy array returns numpy array of spike times in all clusters. Float values represent spike times in seconds (i.e. a value of 1.0 represents a spike at time 1s) """ if isinstance(path_or_files, (str, unicode)): # glob all res files hdlog.info('Loading KlustaKwick data from %s' % os.path.abspath(path_or_files)) import glob res_files = glob.glob(os.path.join(path_or_files, '*.res.*')) else: res_files = path_or_files hdlog.info('Loading KlustaKwick data from files %s' % str(path_or_files)) hdlog.info('Processing %d electrode files' % len(res_files)) spike_times = [] num_clusters = 0 num_spikes = 0 t_min = np.inf t_max = -np.inf cells_filtered = 0 electrodes = [] for fn_res in res_files: hdlog.debug('Processing electrode file "%s"..' % fn_res) electrodes.append(int(fn_res[fn_res.rindex('.') + 1:])) fn_clu = fn_res.replace('.res.', '.clu.') if not os.path.exists(fn_clu): raise Exception('Cluster file "%s" not found!' % fn_clu) #load time stamps times = np.loadtxt(fn_res) * (1. / float(rate)) #load cluster data clusters = np.loadtxt(fn_clu).astype(int) n_clusters = clusters[0] cluster_seq = clusters[1:] if cluster_seq.shape[0] != times.shape[0]: raise Exception('Data inconsistent for files %s, %s: lengths differ!' % (fn_res, fn_clu)) hdlog.debug('%d clusters, %d spikes' % (n_clusters, cluster_seq.shape[0])) spike_times_electrode = [times[np.where(cluster_seq == c)[0]] for c in range(first_cluster, n_clusters)] if filter_silent: c_orig = len(spike_times_electrode) spike_times_electrode = [x for x in spike_times_electrode if len(x) > 0] c_filtered = c_orig - len(spike_times_electrode) cells_filtered += c_filtered spike_times.extend(spike_times_electrode) num_clusters += n_clusters - first_cluster num_spikes += sum(map(len, spike_times_electrode)) t_min = min(t_min, min(times)) t_max = max(t_max, max(times)) status = { 'clusters': num_clusters, 'discarded_clusters': first_cluster * len(res_files), 'filtered': cells_filtered, 't_min': t_min, 't_max': t_max, 'num_spikes': num_spikes, 'electrodes': electrodes } hdlog.info('Processed %d clusters (%d discarded), %d cells (%d silent discarded), %d spikes total, t_min=%f s, t_max=%f s, delta=%f s' % (num_clusters, first_cluster * len(res_files), num_clusters - cells_filtered, cells_filtered, num_spikes, t_min, t_max, t_max - t_min)) if return_status: return spike_times, status else: return spike_times
def get_spike_sequence(spike_times, cells=None, t_min=None, t_max=None): """ Extracts the firing sequence from the given spike times, i.e. a binary matrix S of dimension N x M where N is the number of neurons and M the total number of spikes in the data set. Each column of S contains exactly one non-zero entry, the index of the cell that spiked. Absolute spike timing information is discarded, spike order is preserved. Takes optional arguments cells, t_min and t_max that can be used to restrict the cell indices (defaults to all cells) and time range (default t_min = minimum of all spike times in spike_times, default t_max = maximum of all spike times in spike_times). Parameters ---------- spike_times : array_like 2d array of spike times of cells cells : array_like, optional indices of cells to process (default None, i.e. all cells) t_min : float, optional time of leftmost bin (default None) t_max : float, optional time of rightmost bin (default None) Returns ------- sequence : 2d numpy array of int Spike sequence matrix S """ t_min_dat = np.inf t_max_dat = -np.inf spike_times = np.atleast_2d(spike_times) if cells is None: cells = np.array(range(len(spike_times))) spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0] if len(spike_times_nonempty) > 0: t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty)) t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty)) if t_min is None: t_min = t_min_dat if t_max is None: t_max = t_max_dat if t_min == np.inf or t_max == -np.inf: hdlog.info('No spikes!') return np.zeros((len(spike_times), 1)) num_cells = len(cells) num_spikes = sum(map(len, spike_times_nonempty)) sequence = np.zeros((num_cells, num_spikes), dtype=int) hdlog.info( 'Extracting sequences for {c} cells between t_min={m} and t_max={M}, {s} spikes' .format(c=num_cells, m=t_min, M=t_max, s=num_spikes)) times = np.array([s for c in spike_times_nonempty for s in c]) sort_idx = np.argsort(times) idxs = np.array( [i for i, c in enumerate(spike_times_nonempty) for _ in c]) for i, c in enumerate(idxs[sort_idx]): sequence[i, c] = 1 return sequence
def bin_spike_times(spike_times, bin_size, cells = None, t_min=None, t_max=None): """ Bins given spike_times into bins of size bin_size. Spike times expected in seconds (i.e. 1.0 for a spike at second 1, 0.5 for a spike happening at 500ms). Takes optional arguments cells, t_min and t_max that can be used to restrict the cell indices (defaults to all cells) and time range (default t_min = minimum of all spike times in spike_times, default t_max = maximum of all spike times in spike_times). Parameters ---------- spike_times : 2d numpy array 2d array of spike times of cells, cells as rows bin_size : float bin size to be used for binning (1ms = 0.001) cells : array_like, optional indices of cells to process (default None, i.e. all cells) t_min : float, optional time of leftmost bin (default None) t_max : float, optional time of rightmost bin (default None) Returns ------- spikes : :class:`.Spikes` Spikes class containing binned spikes. """ t_min_dat = np.inf t_max_dat = -np.inf spike_times = np.atleast_1d(spike_times) if cells is None: cells = np.array(range(len(spike_times))) spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0] if len(spike_times_nonempty) > 0: t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty)) t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty)) if t_min is None: t_min = t_min_dat if t_max is None: t_max = t_max_dat if t_min == np.inf or t_max == -np.inf: hdlog.info('No spikes!') return np.zeros((len(spike_times), 1)) bins = np.arange(t_min, t_max + bin_size, bin_size) binned = np.zeros((len(spike_times[cells]), len(bins)), dtype=int) hdlog.info('Binning {c} cells between t_min={m} and t_max={M}, {bins} bins'.format( c=binned.shape[0], m=t_min, M=t_max, bins=len(bins) )) pos = 0 for st in spike_times: if len(st) > 0: indices = np.digitize(st, bins) - 1 binned[pos, indices] = 1 pos += 1 return Spikes(spikes=binned)
def bin_spike_times(spike_times, bin_size, cells=None, t_min=None, t_max=None): """ Bins given spike_times into bins of size bin_size. Spike times expected in seconds (i.e. 1.0 for a spike at second 1, 0.5 for a spike happening at 500ms). Takes optional arguments cells, t_min and t_max that can be used to restrict the cell indices (defaults to all cells) and time range (default t_min = minimum of all spike times in spike_times, default t_max = maximum of all spike times in spike_times). Parameters ---------- spike_times : 2d numpy array 2d array of spike times of cells, cells as rows bin_size : float bin size to be used for binning (1ms = 0.001) cells : array_like, optional indices of cells to process (default None, i.e. all cells) t_min : float, optional time of leftmost bin (default None) t_max : float, optional time of rightmost bin (default None) Returns ------- spikes : :class:`.Spikes` Spikes class containing binned spikes. """ t_min_dat = np.inf t_max_dat = -np.inf spike_times = np.atleast_1d(spike_times) if cells is None: cells = np.array(range(len(spike_times))) spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0] if len(spike_times_nonempty) > 0: t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty)) t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty)) if t_min is None: t_min = t_min_dat if t_max is None: t_max = t_max_dat if t_min == np.inf or t_max == -np.inf: hdlog.info('No spikes!') return np.zeros((len(spike_times), 1)) bins = np.arange(t_min, t_max + bin_size, bin_size) binned = np.zeros((len(spike_times[cells]), len(bins)), dtype=int) hdlog.info( 'Binning {c} cells between t_min={m} and t_max={M}, {bins} bins'. format(c=binned.shape[0], m=t_min, M=t_max, bins=len(bins))) pos = 0 for st in spike_times: if len(st) > 0: indices = np.digitize(st, bins) - 1 binned[pos, indices] = 1 pos += 1 return Spikes(spikes=binned)
def get_spike_sequence(spike_times, cells = None, t_min=None, t_max=None): """ Extracts the firing sequence from the given spike times, i.e. a binary matrix S of dimension N x M where N is the number of neurons and M the total number of spikes in the data set. Each column of S contains exactly one non-zero entry, the index of the cell that spiked. Absolute spike timing information is discarded, spike order is preserved. Takes optional arguments cells, t_min and t_max that can be used to restrict the cell indices (defaults to all cells) and time range (default t_min = minimum of all spike times in spike_times, default t_max = maximum of all spike times in spike_times). Parameters ---------- spike_times : array_like 2d array of spike times of cells cells : array_like, optional indices of cells to process (default None, i.e. all cells) t_min : float, optional time of leftmost bin (default None) t_max : float, optional time of rightmost bin (default None) Returns ------- sequence : 2d numpy array of int Spike sequence matrix S """ t_min_dat = np.inf t_max_dat = -np.inf spike_times = np.atleast_2d(spike_times) if cells is None: cells = np.array(range(len(spike_times))) spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0] if len(spike_times_nonempty) > 0: t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty)) t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty)) if t_min is None: t_min = t_min_dat if t_max is None: t_max = t_max_dat if t_min == np.inf or t_max == -np.inf: hdlog.info('No spikes!') return np.zeros((len(spike_times), 1)) num_cells = len(cells) num_spikes = sum(map(len, spike_times_nonempty)) sequence = np.zeros((num_cells, num_spikes), dtype=int) hdlog.info('Extracting sequences for {c} cells between t_min={m} and t_max={M}, {s} spikes'.format( c=num_cells, m=t_min, M=t_max, s=num_spikes)) times = np.array([s for c in spike_times_nonempty for s in c]) sort_idx = np.argsort(times) idxs = np.array([i for i, c in enumerate(spike_times_nonempty) for _ in c]) for i, c in enumerate(idxs[sort_idx]): sequence[i, c] = 1 return sequence
def read_spikes(path_or_files, rate, first_cluster=2, filter_silent=True, return_status=False): """ Reader for `KlustaKwick <https://github.com/klusta-team/klustakwik>`_ files. Parameters ---------- path_or_files : string path of data set or list of \*.res.\* files to load rate : float sampling rate [in Hz] discard_first_cluster : integer, optional discard first n clusters, commonly used for unclassified spikes (default 2) filter_silent : boolean, optional filter out clusters that have no spikes (default True) return_status : boolean, optional if True returns a status dictionary along with data as second return value (default False) Returns ------- spikes_times : numpy array returns numpy array of spike times in all clusters. Float values represent spike times in seconds (i.e. a value of 1.0 represents a spike at time 1s) """ if isinstance(path_or_files, (str, unicode)): # glob all res files hdlog.info('Loading KlustaKwick data from %s' % os.path.abspath(path_or_files)) import glob res_files = glob.glob(os.path.join(path_or_files, '*.res.*')) else: res_files = path_or_files hdlog.info('Loading KlustaKwick data from files %s' % str(path_or_files)) hdlog.info('Processing %d electrode files' % len(res_files)) spike_times = [] num_clusters = 0 num_spikes = 0 t_min = np.inf t_max = -np.inf cells_filtered = 0 electrodes = [] for fn_res in res_files: hdlog.debug('Processing electrode file "%s"..' % fn_res) electrodes.append(int(fn_res[fn_res.rindex('.') + 1:])) fn_clu = fn_res.replace('.res.', '.clu.') if not os.path.exists(fn_clu): raise Exception('Cluster file "%s" not found!' % fn_clu) #load time stamps times = np.loadtxt(fn_res) * (1. / float(rate)) #load cluster data clusters = np.loadtxt(fn_clu).astype(int) n_clusters = clusters[0] cluster_seq = clusters[1:] if cluster_seq.shape[0] != times.shape[0]: raise Exception( 'Data inconsistent for files %s, %s: lengths differ!' % (fn_res, fn_clu)) hdlog.debug('%d clusters, %d spikes' % (n_clusters, cluster_seq.shape[0])) spike_times_electrode = [ times[np.where(cluster_seq == c)[0]] for c in xrange(first_cluster, n_clusters) ] if filter_silent: c_orig = len(spike_times_electrode) spike_times_electrode = [ x for x in spike_times_electrode if len(x) > 0 ] c_filtered = c_orig - len(spike_times_electrode) cells_filtered += c_filtered spike_times.extend(spike_times_electrode) num_clusters += n_clusters - first_cluster num_spikes += sum(map(len, spike_times_electrode)) t_min = min(t_min, min(times)) t_max = max(t_max, max(times)) status = { 'clusters': num_clusters, 'discarded_clusters': first_cluster * len(res_files), 'filtered': cells_filtered, 't_min': t_min, 't_max': t_max, 'num_spikes': num_spikes, 'electrodes': electrodes } hdlog.info( 'Processed %d clusters (%d discarded), %d cells (%d silent discarded), %d spikes total, t_min=%f s, t_max=%f s, delta=%f s' % (num_clusters, first_cluster * len(res_files), num_clusters - cells_filtered, cells_filtered, num_spikes, t_min, t_max, t_max - t_min)) if return_status: return spike_times, status else: return spike_times