Exemple #1
0
    def chomp(self):
        """
        Missing documentation
        
        Returns
        -------
        Value : Type
            Description
        """
        hdlog.debug("Chomping samples from model")
        self._raw_patterns = PatternsRaw(save_sequence=True)
        self._raw_patterns.chomp_spikes(spikes=self._sample_spikes)
        hdlog.info("Raw: %d-bit, %d patterns" % (
            self._sample_spikes.N, len(self._raw_patterns)))

        hdlog.debug("Chomping dynamics (from network learned on the samples) applied to samples")
        self._hopfield_patterns = PatternsHopfield(learner=self._learner, save_sequence=True)
        self._hopfield_patterns.chomp_spikes(spikes=self._sample_spikes)
        hdlog.info("Hopfield: %d-bit, %d patterns" % (
            self._sample_spikes.N, len(self._hopfield_patterns)))

        # print "Before dynamics:"
        # print self.sample_spikes.spikes
        # print "Applied dynamics:"
        self._hopfield_spikes = self._hopfield_patterns.apply_dynamics(spikes=self._sample_spikes, reshape=True)
Exemple #2
0
    def learn_from_binary(self, X, remove_zeros=False, disp=False):
        """
        Trains on M x N matrix X of M N-length binary vects

        Parameters
        ----------
        X : numpy array
            (M, N)-dim array of binary input patterns of length N,
            where N is the number of nodes in the network
        remove_zeros : bool, optional
            Flag whether to remove vectors from X in which
            all entries are 0 (default True)
        disp : bool, optional
            Display scipy L-BFGS-B output (default False)

        Returns
        -------
        Nothing
        """
        self.network = HopfieldNetMPF(len(X[0]))
        if remove_zeros:
            X_ = X[X.mean(axis=1) != 0., :]  # remove all zeros
            hdlog.info(
                "Learning %d %d-bit (nonzero) binary patterns, sparsity %.04f..."
                % (X_.shape[0], X_.shape[1], X_.mean()))
        else:
            X_ = X
            hdlog.info(
                "Learning %d %d-bit binary patterns, sparsity %.04f..." %
                (X_.shape[0], X_.shape[1], X_.mean()))
        self.network.learn_all(X_, disp=disp)
Exemple #3
0
    def learn_from_binary(self, X, remove_zeros=False, disp=False):
        """
        Trains on M x N matrix X of M N-length binary vects

        Parameters
        ----------
        X : numpy array
            (M, N)-dim array of binary input patterns of length N,
            where N is the number of nodes in the network
        remove_zeros : bool, optional
            Flag whether to remove vectors from X in which
            all entries are 0 (default True)
        disp : bool, optional
            Display scipy L-BFGS-B output (default False)

        Returns
        -------
        Nothing
        """
        self.network = HopfieldNetMPF(len(X[0]))
        if remove_zeros:
            X_ = X[X.mean(axis=1) != 0., :]  # remove all zeros
            hdlog.info("Learning %d %d-bit (nonzero) binary patterns, sparsity %.04f..." % (
                X_.shape[0], X_.shape[1], X_.mean()))
        else:
            X_ = X
            hdlog.info("Learning %d %d-bit binary patterns, sparsity %.04f..." % (X_.shape[0], X_.shape[1], X_.mean()))
        self.network.learn_all(X_, disp=disp)
Exemple #4
0
    def test_patterns_hopfield(self):
        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        learner = Learner(spikes)
        learner.learn_from_spikes(spikes)

        patterns = PatternsHopfield(learner=learner)
        patterns.chomp_spikes(spikes)
        # print spikes.spikes
        self.assertEqual(len(patterns), 3)
        # print "%d fixed-points (entropy H = %1.3f):" % (len(patterns), patterns.entropy())
        # print map(patterns.pattern_for_key, patterns.counts.keys())

        patterns.save(os.path.join(self.TMP_PATH, 'patterns'))
        patterns2 = PatternsHopfield.load(os.path.join(self.TMP_PATH, 'patterns'))
        self.assertTrue(isinstance(patterns2, PatternsHopfield))
        self.assertEqual(len(patterns2), 3)
        self.assertEqual(len(patterns2.mtas), 3)
        self.assertEqual(len(patterns2.mtas_raw), 3)

        learner.learn_from_spikes(spikes, window_size=3)
        patterns = PatternsHopfield(learner=learner)
        patterns.chomp_spikes(spikes, window_size=3)
        # print spikes.spikes
        
        # print patterns.counts
        self.assertEqual(len(patterns), 4)
        # print "%d fixed-points (entropy H = %1.3f):" % (len(patterns), patterns.entropy())
        # for x in patterns.list_patterns(): print x

        spikes_arr1 = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 0]])
        spikes = Spikes(spikes=spikes_arr1)
        learner = Learner(spikes)
        learner.learn_from_spikes(spikes)

        # test recording fixed-points
        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        learner = Learner(spikes)
        learner.learn_from_spikes(spikes)
        patterns = PatternsHopfield(learner, save_sequence=True)
        patterns.chomp_spikes(spikes)
        self.assertEqual(patterns._sequence, [0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1])

        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        learner = Learner(spikes)
        learner.learn_from_spikes(spikes, window_size=2)
        patterns = PatternsHopfield(learner, save_sequence=True)
        patterns.chomp_spikes(spikes, window_size=2)
        # print patterns.mtas
        # print patterns.sequence
        # for x in patterns.list_patterns(): print x
        # print spikes.spikes
        self.assertEqual(patterns._sequence, [0, 1, 2, 3, 0, 1, 4, 5, 6, 5, 7, 3])
        # self.assertTrue(np.mean(patterns.pattern_to_binary_matrix(1) == [[0, 0], [0, 1], [1, 0]]))
        # self.assertTrue(np.mean(patterns.pattern_to_mta_matrix(1) == [[0, 0], [0, 1], [1, .5]]))
        
        hdlog.info(spikes._spikes)
        hdlog.info(patterns.pattern_to_trial_raster(3))
Exemple #5
0
    def test_patterns_raw(self):
        file_contents = np.load(
            os.path.join(os.path.dirname(__file__),
                         'test_data/tiny_spikes.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        hdlog.info(spikes._spikes)
        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes)
        hdlog.info(patterns._counts)
        self.assertEqual(len(patterns), 3)

        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(patterns), 4)

        file_contents = np.load(
            os.path.join(os.path.dirname(__file__),
                         'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(patterns), 9)

        patterns.save(os.path.join(self.TMP_PATH, 'raw'))
        patterns2 = PatternsRaw.load(os.path.join(self.TMP_PATH, 'raw'))
        self.assertTrue(isinstance(patterns2, PatternsRaw))
        self.assertEqual(len(patterns), len(patterns2))
Exemple #6
0
    def chomp(self):
        """
        Missing documentation
        
        Returns
        -------
        Value : Type
            Description
        """
        hdlog.debug("Chomping samples from model")
        self._raw_patterns = PatternsRaw(save_sequence=True)
        self._raw_patterns.chomp_spikes(spikes=self._sample_spikes)
        hdlog.info("Raw: %d-bit, %d patterns" %
                   (self._sample_spikes.N, len(self._raw_patterns)))

        hdlog.debug(
            "Chomping dynamics (from network learned on the samples) applied to samples"
        )
        self._hopfield_patterns = PatternsHopfield(learner=self._learner,
                                                   save_sequence=True)
        self._hopfield_patterns.chomp_spikes(spikes=self._sample_spikes)
        hdlog.info("Hopfield: %d-bit, %d patterns" %
                   (self._sample_spikes.N, len(self._hopfield_patterns)))

        # print "Before dynamics:"
        # print self.sample_spikes.spikes
        # print "Applied dynamics:"
        self._hopfield_spikes = self._hopfield_patterns.apply_dynamics(
            spikes=self._sample_spikes, reshape=True)
Exemple #7
0
    def find_subsequences(self, thresholds, sequence=None):
        """
        Enumerates all subsequences of length `len(thresholds)`
        in `sequence` (if sequence is `None` the possibly
        filtered sequence from the stored counter object is taken).
        Subsequences of length i are only considered if they
        appear at least `thresholds[i - 1]` times in the sequence.

        Parameters
        ----------
        thresholds : list, int
            List of threshold values
        sequence : list or numpy array, optional
            Sequence to consider, if `None` defaults to stored
            sequence (default `None`)

        Returns
        -------
        sequences : list of dicts
            List of dictionaries containing all found sequences
            as keys and counts as values. Keys are memory labels separated
            by ','.
        """
        if sequence is None:
            sequence = self.sequence

        import collections
        counts = {
            str(item[0]): item[1]
            for item in collections.Counter(self.sequence).items()
        }

        all_counts = []

        maxlen = len(thresholds)
        for l in range(2, maxlen + 2):
            for k in counts.keys():
                if counts[k] < thresholds[l - 2]:
                    del counts[k]

            all_counts.append(counts)

            if l == maxlen + 1:
                break

            hdlog.info('processing subsequences of length %d' % l)
            counts_new = {}
            for s in SequenceAnalyzer.subseqs(sequence, l):
                subkey = ','.join([str(x) for x in s[:l - 1]])
                if subkey not in counts:
                    continue
                key = subkey + ',' + str(s[l - 1])
                if key not in counts_new:
                    counts_new[key] = 1
                else:
                    counts_new[key] += 1

            counts = counts_new

        return all_counts
Exemple #8
0
    def test_saving(self):
        spikes = Spikes(spikes=np.array([[1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0]]))
        hdlog.info(spikes.spikes)

        spikes.save(os.path.join(self.TMP_PATH, 'spikes'))
        spikes2 = Spikes.load(os.path.join(self.TMP_PATH, 'spikes'))
        hdlog.info(spikes2.spikes)
        self.assertTrue((spikes.spikes == spikes2.spikes).all())
Exemple #9
0
    def test_saving(self):
        spikes = Spikes(spikes=np.array([[1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0]]))
        hdlog.info(spikes.spikes)

        spikes.save(os.path.join(self.TMP_PATH, 'spikes'))
        spikes2 = Spikes.load(os.path.join(self.TMP_PATH, 'spikes'))
        hdlog.info(spikes2.spikes)
        self.assertTrue((spikes.spikes == spikes2.spikes).all())
Exemple #10
0
    def find_subsequences(self, thresholds, sequence=None):
        """
        Enumerates all subsequences of length `len(thresholds)`
        in `sequence` (if sequence is `None` the possibly
        filtered sequence from the stored counter object is taken).
        Subsequences of length i are only considered if they
        appear at least `thresholds[i - 1]` times in the sequence.

        Parameters
        ----------
        thresholds : list, int
            List of threshold values
        sequence : list or numpy array, optional
            Sequence to consider, if `None` defaults to stored
            sequence (default `None`)

        Returns
        -------
        sequences : list of dicts
            List of dictionaries containing all found sequences
            as keys and counts as values. Keys are memory labels separated
            by ','.
        """
        if sequence is None:
            sequence = self.sequence

        import collections
        counts = {str(item[0]): item[1] for item in collections.Counter(self.sequence).items()}

        all_counts = []

        maxlen = len(thresholds)
        for l in xrange(2, maxlen + 2):
            for k in counts.keys():
                if counts[k] < thresholds[l - 2]:
                    del counts[k]

            all_counts.append(counts)

            if l == maxlen + 1:
                break

            hdlog.info('processing subsequences of length %d' % l)
            counts_new = {}
            for s in SequenceAnalyzer.subseqs(sequence, l):
                subkey = ','.join([str(x) for x in s[:l - 1]])
                if subkey not in counts:
                    continue
                key = subkey + ',' + str(s[l - 1])
                if key not in counts_new:
                    counts_new[key] = 1
                else:
                    counts_new[key] += 1

            counts = counts_new

        return all_counts
Exemple #11
0
    def test_counter(self):
        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        hdlog.info(spikes._spikes)

        counter = Counter()
        counter.chomp_spikes(spikes)
        hdlog.info(counter._counts)
        self.assertEqual(len(counter), 4)

        counter = Counter()
        counter.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(counter), 4)

        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        counter = Counter()
        counter.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(counter), 9)

        counter.save(os.path.join(self.TMP_PATH, 'counter'))
        counter2 = Counter.load(os.path.join(self.TMP_PATH, 'counter'))
        self.assertTrue(isinstance(counter2, Counter))
        self.assertEqual(len(counter), len(counter2))

        spikes_arr1 = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 0]])
        spikes = Spikes(spikes=spikes_arr1)
        counter1 = Counter()
        counter1.chomp_spikes(spikes)
        counter2 = Counter()
        counter2.chomp_spikes(spikes)
        counter2.merge_counts(counter1)
        self.assertEqual(sum(counter2._counts.values()), 6)
        counter3 = counter2 + counter1
        self.assertEqual(counter3, counter2)
        self.assertEqual(sum(counter3.counts.values()), 9)

        spikes_arr2 = np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])
        spikes = Spikes(spikes=spikes_arr2)
        counter4 = Counter().chomp_spikes(spikes).merge_counts(counter3)
        self.assertEqual(len(counter4.counts.keys()), 5)
        self.assertEqual(len(counter4.patterns), 5)

        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        counter = Counter(save_sequence=True)
        counter.chomp_spikes(spikes)
        self.assertEqual(counter._sequence, [0, 1, 0, 2, 3, 4, 1, 5, 4, 6, 2, 0, 6, 2, 2])

        np.random.seed(42)
        spikes_arr = (np.random.randn(5, 10000) < .05).astype(np.int)
        spikes = Spikes(spikes=spikes_arr)
        empirical = PatternsRaw()
        empirical.chomp_spikes(spikes)
        empirical_w2 = PatternsRaw()
        empirical_w2.chomp_spikes(spikes, window_size=2)
        self.assertTrue(np.abs(empirical_w2.entropy() - 2 * empirical.entropy()) < .1)
Exemple #12
0
    def __init__(self,
                 stimulus_arr=None,
                 npz_file=None,
                 h5_file=None,
                 preprocess=True):
        """
        Missing documentation
        
        Parameters
        ----------
        stimulus_arr : Type, optional
            Description (default None)
        npz_file : Type, optional
            Description (default None)
        h5_file : Type, optional
            Description (default None)
        preprocess : bool, optional
            Description (default True)
        
        Returns
        -------
        Value : Type
            Description
        """
        object.__init__(self)
        Restoreable.__init__(self)

        # TODO reuse io functionality from data module!

        self.file_name = npz_file or ''
        if npz_file is None and stimulus_arr is None and h5_file is None:
            self._M = 0
            return

        if stimulus_arr is not None:
            self._stimulus_arr = stimulus_arr

        if npz_file is not None:
            if not os.path.isfile(npz_file):
                hdlog.info("File '%s' does not exist!" % npz_file)
                return
            self.file_name = npz_file
            tmp = np.load(npz_file)
            self._stimulus_arr = tmp[tmp.keys()[0]]

        if h5_file is not None:
            import h5py
            f = h5py.File(h5_file)
            self._stimulus_arr = f[f.keys()[0]]

        if preprocess:
            self.preprocess()

        self._M = self._stimulus_arr.shape[0]
        self._X = self._stimulus_arr.shape[1:]
Exemple #13
0
    def test_learning(self):
        # OPR learning (Hopfield original rule)
        np.random.seed(42)

        N = 300
        M = N / (4 * np.log(N))  # theoretical max for OPR [McEliece et al, 87]
        t = now()
        OPR = HopfieldNet(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        OPR.learn_all(data)

        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean()
        # recall = OPR.exact_recalled(data, model='OPR')
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" %
                   (M, N, 100 * recall, now() - t))
        self.assertTrue(recall > .8)

        M = 50
        OPR = HopfieldNet(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        OPR.learn_all(data)
        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean()
        # recall = OPR.exact_recalled(data)
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" %
                   (M, N, 100 * recall, now() - t))
        self.assertTrue(recall < .5)

        MPF = HopfieldNetMPF(N)
        MPF.learn_all(data)
        recall = MPF.exact_recalled(data)
        hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" %
                   (M, N, 100 * recall, now() - t))
        self.assertEqual(recall, 1)

        # store 90 memories in 64-bit neurons
        N = 64
        M = 90
        t = now()
        MPF = HopfieldNetMPF(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        MPF.learn_all(data)
        recall = MPF.exact_recalled(data)
        hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" %
                   (M, N, 100 * recall, now() - t))
        self.assertEqual(recall, 1)
        OPR = HopfieldNet(N)
        OPR.learn_all(data)
        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = OPR.exact_recalled(data)
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" %
                   (M, N, 100 * recall, now() - t))
        self.assertTrue(recall < .01)
Exemple #14
0
    def test_learning(self):
        # OPR learning (Hopfield original rule)
        np.random.seed(42)

        N = 300
        M = N / (4 * np.log(N))  # theoretical max for OPR [McEliece et al, 87]
        t = now()
        OPR = HopfieldNet(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        OPR.learn_all(data)

        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean()
        # recall = OPR.exact_recalled(data, model='OPR')
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t))
        self.assertTrue(recall > .8)

        M = 50
        OPR = HopfieldNet(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        OPR.learn_all(data)
        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = (data == OPR.hopfield_binary_dynamics(data, model='OPR')).all(1).mean()
        # recall = OPR.exact_recalled(data)
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t))
        self.assertTrue(recall < .5)

        MPF = HopfieldNetMPF(N)
        MPF.learn_all(data)
        recall = MPF.exact_recalled(data)
        hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t))
        self.assertEqual(recall, 1)

        # store 90 memories in 64-bit neurons
        N = 64
        M = 90
        t = now()
        MPF = HopfieldNetMPF(N)
        data = (np.random.random((M, N)) < .5).astype('int')
        MPF.learn_all(data)
        recall = MPF.exact_recalled(data)
        hdlog.info("MPF Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t))
        self.assertEqual(recall, 1)
        OPR = HopfieldNet(N)
        OPR.learn_all(data)
        recall = (data == OPR.hopfield_binary_dynamics(data)).all(1).mean()
        # recall = OPR.exact_recalled(data)
        hdlog.info("OPR Performance (%d/%d): %1.2f in %1.2f s" % (M, N, 100 * recall, now() - t))
        self.assertTrue(recall < .01)
Exemple #15
0
    def load_legacy(cls, file_name='counter'):
        base, ext = os.path.splitext(file_name)
        if not ext:
            ext = ".npz"
        file_name = base + ext

        hdlog.info("Loading Counter patterns from legacy file '%s'" % file_name)
        instance = cls()
        contents = np.load(file_name)
        instance._counts = dict(zip(contents['count_keys'], contents['count_values']))
        instance._patterns = contents['fp_list']
        instance._lookup_patterns = dict(zip(contents['lookup_fp_keys'], contents['lookup_fp_values']))
        instance._sequence = contents['sequence']
        contents.close()
        return instance
Exemple #16
0
    def load_legacy(cls, file_name='patterns_hopfield'):
        # internal function to load legacy file format
        base, ext = os.path.splitext(file_name)
        if not ext:
            ext = ".npz"
        file_name = base + ext

        hdlog.info("Loading PatternHopfield patterns from legacy file '%s'" % file_name)
        instance = cls()
        contents = np.load(file_name)
        instance._counts = dict(zip(contents['count_keys'], contents['count_values']))
        instance._patterns = contents['fp_list']
        instance._lookup_patterns = dict(zip(contents['lookup_fp_keys'], contents['lookup_fp_values']))
        instance._sequence = contents['sequence']
        instance._mtas = dict(zip(contents['stas_keys'], contents['stas_values']))
        instance._sequence = contents['sequence']
        contents.close()
        return instance
Exemple #17
0
    def load_legacy(cls, file_name='counter'):
        base, ext = os.path.splitext(file_name)
        if not ext:
            ext = ".npz"
        file_name = base + ext

        hdlog.info("Loading Counter patterns from legacy file '%s'" %
                   file_name)
        instance = cls()
        contents = np.load(file_name)
        instance._counts = dict(
            zip(contents['count_keys'], contents['count_values']))
        instance._patterns = contents['fp_list']
        instance._lookup_patterns = dict(
            zip(contents['lookup_fp_keys'], contents['lookup_fp_values']))
        instance._sequence = contents['sequence']
        contents.close()
        return instance
Exemple #18
0
    def distinct_patterns_over_windows(self, window_sizes=None, trials=None, save_couplings=False, remove_zeros=False):
        """
        Returns tuple: counts, entropies [, couplings]
        counts, entropies: arrays of size 2 x T x WSizes
        (0: empirical from model sample, 1: dynamics from learned model on sample)
        
        Parameters
        ----------
        window_sizes : Type, optional
            Description (default None)
        trials : Type, optional
            Description (default None)
        save_couplings : bool, optional
            Description (default False)
        remove_zeros : bool, optional
            Description (default False)
        
        Returns
        -------
        Value : Type
            Description
        """
        if window_sizes is None:
            window_sizes = [1]
        trials = trials or range(self._original_spikes.T)
        counts = np.zeros((2, len(trials), len(window_sizes)))
        #entropies = np.zeros((2, len(trials), len(window_sizes)))

        couplings = {}

        tot_learn_time = 0

        for ws, window_size in enumerate(window_sizes):
            couplings[window_size] = []

            for c, trial in enumerate(trials):
                hdlog.info("Trial %d | ws %d" % (trial, window_size))

                self._window_size = window_size

                t = now()
                self.fit(trials=[trial], remove_zeros=remove_zeros)
                diff = now() - t
                hdlog.info("[%1.3f min]" % (diff / 60.))
                tot_learn_time += diff

                if save_couplings:
                    couplings[ws].append(self._learner.network.J.copy())

                self.chomp()
                #entropies[0, c, ws] = self._raw_patterns.entropy()
                counts[0, c, ws] = len(self._raw_patterns)
                #entropies[1, c, ws] = self._hopfield_patterns.entropy()
                counts[1, c, ws] = len(self._hopfield_patterns)

        hdlog.info("Total learn time: %1.3f mins" % (tot_learn_time / 60.))
        self._learn_time = tot_learn_time
        if save_couplings:
            return counts, couplings
        return counts
Exemple #19
0
    def load_legacy(cls, file_name='patterns_hopfield'):
        # internal function to load legacy file format
        base, ext = os.path.splitext(file_name)
        if not ext:
            ext = ".npz"
        file_name = base + ext

        hdlog.info("Loading PatternHopfield patterns from legacy file '%s'" %
                   file_name)
        instance = cls()
        contents = np.load(file_name)
        instance._counts = dict(
            zip(contents['count_keys'], contents['count_values']))
        instance._patterns = contents['fp_list']
        instance._lookup_patterns = dict(
            zip(contents['lookup_fp_keys'], contents['lookup_fp_values']))
        instance._sequence = contents['sequence']
        instance._mtas = dict(
            zip(contents['stas_keys'], contents['stas_values']))
        instance._sequence = contents['sequence']
        contents.close()
        return instance
Exemple #20
0
    def test_patterns_raw(self):
        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/tiny_spikes.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        hdlog.info(spikes._spikes)
        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes)
        hdlog.info(patterns._counts)
        self.assertEqual(len(patterns), 4)

        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(patterns), 4)

        file_contents = np.load(os.path.join(os.path.dirname(__file__), 'test_data/spikes_trials.npz'))
        spikes = Spikes(file_contents[file_contents.keys()[0]])
        patterns = PatternsRaw()
        patterns.chomp_spikes(spikes, window_size=3)
        self.assertEqual(len(patterns), 9)

        patterns.save(os.path.join(self.TMP_PATH, 'raw'))
        patterns2 = PatternsRaw.load(os.path.join(self.TMP_PATH, 'raw'))
        self.assertTrue(isinstance(patterns2, PatternsRaw))
        self.assertEqual(len(patterns), len(patterns2))
Exemple #21
0
    def distinct_patterns_over_windows(self, window_sizes=None, trials=None, save_couplings=False, remove_zeros=False):
        """
        Returns tuple: counts, entropies [, couplings]
        counts, entropies: arrays of size 2 x T x WSizes
        (0: empirical from model sample, 1: dynamics from learned model on sample)
        
        Parameters
        ----------
        window_sizes : Type, optional
            Description (default None)
        trials : Type, optional
            Description (default None)
        save_couplings : bool, optional
            Description (default False)
        remove_zeros : bool, optional
            Description (default False)
        
        Returns
        -------
        Value : Type
            Description
        """
        if window_sizes is None:
            window_sizes = [1]
        trials = trials or range(self._original_spikes.T)
        counts = np.zeros((2, len(trials), len(window_sizes)))
        entropies = np.zeros((2, len(trials), len(window_sizes)))

        tot_learn_time = 0

        for ws, window_size in enumerate(window_sizes):
            for c, trial in enumerate(trials):
                hdlog.info("Trial %d | ws %d" % (trial, window_size))

                self._window_size = window_size

                t = now()
                self.fit(trials=[trial], remove_zeros=remove_zeros)
                diff = now() - t
                hdlog.info("[%1.3f min]" % (diff / 60.))
                tot_learn_time += diff

                if save_couplings:
                    couplings[trial].append(self._learner.network.J.copy())

                self.chomp()
                entropies[0, c, ws] = self._raw_patterns.entropy()
                counts[0, c, ws] = len(self._raw_patterns)
                entropies[1, c, ws] = self._hopfield_patterns.entropy()
                counts[1, c, ws] = len(self._hopfield_patterns)

        hdlog.info("Total learn time: %1.3f mins" % (tot_learn_time / 60.))
        self._learn_time = tot_learn_time
        if save_couplings:
            return counts, entropies, couplings
        return counts, entropies
Exemple #22
0
    def read_spikes(path_or_files, rate, first_cluster=2, filter_silent=True, return_status=False):
        """
        Reader for `KlustaKwick <https://github.com/klusta-team/klustakwik>`_ files.
        
        Parameters
        ----------
        path_or_files : string
            path of data set or list of \*.res.\* files to load
        rate : float
            sampling rate [in Hz]
        discard_first_cluster : integer, optional
            discard first n clusters, commonly used for unclassified spikes (default 2)
        filter_silent : boolean, optional
            filter out clusters that have no spikes (default True)
        return_status : boolean, optional
            if True returns a status dictionary along with data as second return value (default False)

        Returns
        -------
        spikes_times : numpy array
            returns numpy array of spike times in all clusters. Float values represent spike times
            in seconds (i.e. a value of 1.0 represents a spike at time 1s)
        """

        if isinstance(path_or_files, (str, unicode)):
            # glob all res files
            hdlog.info('Loading KlustaKwick data from %s' % os.path.abspath(path_or_files))
            import glob
            res_files = glob.glob(os.path.join(path_or_files, '*.res.*'))
        else:
            res_files = path_or_files
            hdlog.info('Loading KlustaKwick data from files %s' % str(path_or_files))

        hdlog.info('Processing %d electrode files' % len(res_files))

        spike_times = []
        num_clusters = 0
        num_spikes = 0
        t_min = np.inf
        t_max = -np.inf
        cells_filtered = 0
        electrodes = []

        for fn_res in res_files:
            hdlog.debug('Processing electrode file "%s"..' % fn_res)
            electrodes.append(int(fn_res[fn_res.rindex('.') + 1:]))

            fn_clu = fn_res.replace('.res.', '.clu.')
            if not os.path.exists(fn_clu):
                raise Exception('Cluster file "%s" not found!' % fn_clu)

            #load time stamps
            times = np.loadtxt(fn_res) * (1. / float(rate))

            #load cluster data
            clusters = np.loadtxt(fn_clu).astype(int)
            n_clusters = clusters[0]
            cluster_seq = clusters[1:]

            if cluster_seq.shape[0] != times.shape[0]:
                raise Exception('Data inconsistent for files %s, %s: lengths differ!' % (fn_res, fn_clu))

            hdlog.debug('%d clusters, %d spikes' % (n_clusters, cluster_seq.shape[0]))

            spike_times_electrode = [times[np.where(cluster_seq == c)[0]]
                                     for c in range(first_cluster, n_clusters)]

            if filter_silent:
                c_orig = len(spike_times_electrode)
                spike_times_electrode = [x for x in spike_times_electrode if len(x) > 0]
                c_filtered = c_orig - len(spike_times_electrode)
                cells_filtered += c_filtered

            spike_times.extend(spike_times_electrode)

            num_clusters += n_clusters - first_cluster
            num_spikes += sum(map(len, spike_times_electrode))
            t_min = min(t_min, min(times))
            t_max = max(t_max, max(times))

        status = {
            'clusters': num_clusters,
            'discarded_clusters': first_cluster * len(res_files),
            'filtered': cells_filtered,
            't_min': t_min,
            't_max': t_max,
            'num_spikes': num_spikes,
            'electrodes': electrodes
        }

        hdlog.info('Processed %d clusters (%d discarded), %d cells (%d silent discarded), %d spikes total, t_min=%f s, t_max=%f s, delta=%f s' %
                   (num_clusters, first_cluster * len(res_files), num_clusters - cells_filtered, cells_filtered,
                    num_spikes, t_min, t_max, t_max - t_min))

        if return_status:
            return spike_times, status
        else:
            return spike_times
Exemple #23
0
    def get_spike_sequence(spike_times, cells=None, t_min=None, t_max=None):
        """
        Extracts the firing sequence from the given spike times, i.e. a binary
        matrix S of dimension N x M where N is the number of neurons and M the
        total number of spikes in the data set. Each column of S contains exactly
        one non-zero entry, the index of the cell that spiked. Absolute
        spike timing information is discarded, spike order is preserved.

        Takes optional arguments cells, t_min and t_max that can be used to restrict
        the cell indices (defaults to all cells) and time range
        (default t_min = minimum of all spike times in spike_times,
        default t_max = maximum of all spike times in spike_times).

        Parameters
        ----------
        spike_times : array_like
            2d array of spike times of cells
        cells : array_like, optional
            indices of cells to process (default None, i.e. all cells)
        t_min : float, optional
            time of leftmost bin (default None)
        t_max : float, optional
            time of rightmost bin (default None)

        Returns
        -------
        sequence : 2d numpy array of int
            Spike sequence matrix S
        """
        t_min_dat = np.inf
        t_max_dat = -np.inf

        spike_times = np.atleast_2d(spike_times)

        if cells is None:
            cells = np.array(range(len(spike_times)))

        spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0]
        if len(spike_times_nonempty) > 0:
            t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty))
            t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty))

        if t_min is None:
            t_min = t_min_dat

        if t_max is None:
            t_max = t_max_dat

        if t_min == np.inf or t_max == -np.inf:
            hdlog.info('No spikes!')
            return np.zeros((len(spike_times), 1))

        num_cells = len(cells)
        num_spikes = sum(map(len, spike_times_nonempty))
        sequence = np.zeros((num_cells, num_spikes), dtype=int)

        hdlog.info(
            'Extracting sequences for {c} cells between t_min={m} and t_max={M}, {s} spikes'
            .format(c=num_cells, m=t_min, M=t_max, s=num_spikes))

        times = np.array([s for c in spike_times_nonempty for s in c])
        sort_idx = np.argsort(times)
        idxs = np.array(
            [i for i, c in enumerate(spike_times_nonempty) for _ in c])

        for i, c in enumerate(idxs[sort_idx]):
            sequence[i, c] = 1

        return sequence
Exemple #24
0
    def bin_spike_times(spike_times, bin_size, cells = None, t_min=None, t_max=None):
        """
        Bins given spike_times into bins of size bin_size. Spike times
        expected in seconds (i.e. 1.0 for a spike at second 1, 0.5 for a
        spike happening at 500ms).

        Takes optional arguments cells, t_min and t_max that can be used to restrict
        the cell indices (defaults to all cells) and time range
        (default t_min = minimum of all spike times in spike_times,
        default t_max = maximum of all spike times in spike_times).

        Parameters
        ----------
        spike_times : 2d numpy array
            2d array of spike times of cells, cells as rows
        bin_size : float
            bin size to be used for binning (1ms = 0.001)
        cells : array_like, optional
            indices of cells to process (default None, i.e. all cells)
        t_min : float, optional
            time of leftmost bin (default None)
        t_max : float, optional
            time of rightmost bin (default None)

        Returns
        -------
        spikes : :class:`.Spikes`
            Spikes class containing binned spikes.
        """
        t_min_dat = np.inf
        t_max_dat = -np.inf


        spike_times = np.atleast_1d(spike_times)


        if cells is None:
            cells = np.array(range(len(spike_times)))

        spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0]
        if len(spike_times_nonempty) > 0:
            t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty))
            t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty))

        if t_min is None:
            t_min = t_min_dat

        if t_max is None:
            t_max = t_max_dat

        if t_min == np.inf or t_max == -np.inf:
            hdlog.info('No spikes!')
            return np.zeros((len(spike_times), 1))

        bins = np.arange(t_min, t_max + bin_size, bin_size)
        binned = np.zeros((len(spike_times[cells]), len(bins)), dtype=int)

        hdlog.info('Binning {c} cells between t_min={m} and t_max={M}, {bins} bins'.format(
            c=binned.shape[0], m=t_min, M=t_max, bins=len(bins)
        ))

        pos = 0
        for st in spike_times:
            if len(st) > 0:
                indices = np.digitize(st, bins) - 1
                binned[pos, indices] = 1
                pos += 1

        return Spikes(spikes=binned)
Exemple #25
0
    def bin_spike_times(spike_times,
                        bin_size,
                        cells=None,
                        t_min=None,
                        t_max=None):
        """
        Bins given spike_times into bins of size bin_size. Spike times
        expected in seconds (i.e. 1.0 for a spike at second 1, 0.5 for a
        spike happening at 500ms).

        Takes optional arguments cells, t_min and t_max that can be used to restrict
        the cell indices (defaults to all cells) and time range
        (default t_min = minimum of all spike times in spike_times,
        default t_max = maximum of all spike times in spike_times).

        Parameters
        ----------
        spike_times : 2d numpy array
            2d array of spike times of cells, cells as rows
        bin_size : float
            bin size to be used for binning (1ms = 0.001)
        cells : array_like, optional
            indices of cells to process (default None, i.e. all cells)
        t_min : float, optional
            time of leftmost bin (default None)
        t_max : float, optional
            time of rightmost bin (default None)

        Returns
        -------
        spikes : :class:`.Spikes`
            Spikes class containing binned spikes.
        """
        t_min_dat = np.inf
        t_max_dat = -np.inf

        spike_times = np.atleast_1d(spike_times)

        if cells is None:
            cells = np.array(range(len(spike_times)))

        spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0]
        if len(spike_times_nonempty) > 0:
            t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty))
            t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty))

        if t_min is None:
            t_min = t_min_dat

        if t_max is None:
            t_max = t_max_dat

        if t_min == np.inf or t_max == -np.inf:
            hdlog.info('No spikes!')
            return np.zeros((len(spike_times), 1))

        bins = np.arange(t_min, t_max + bin_size, bin_size)
        binned = np.zeros((len(spike_times[cells]), len(bins)), dtype=int)

        hdlog.info(
            'Binning {c} cells between t_min={m} and t_max={M}, {bins} bins'.
            format(c=binned.shape[0], m=t_min, M=t_max, bins=len(bins)))

        pos = 0
        for st in spike_times:
            if len(st) > 0:
                indices = np.digitize(st, bins) - 1
                binned[pos, indices] = 1
                pos += 1

        return Spikes(spikes=binned)
Exemple #26
0
    def get_spike_sequence(spike_times, cells = None, t_min=None, t_max=None):
        """
        Extracts the firing sequence from the given spike times, i.e. a binary
        matrix S of dimension N x M where N is the number of neurons and M the
        total number of spikes in the data set. Each column of S contains exactly
        one non-zero entry, the index of the cell that spiked. Absolute
        spike timing information is discarded, spike order is preserved.

        Takes optional arguments cells, t_min and t_max that can be used to restrict
        the cell indices (defaults to all cells) and time range
        (default t_min = minimum of all spike times in spike_times,
        default t_max = maximum of all spike times in spike_times).

        Parameters
        ----------
        spike_times : array_like
            2d array of spike times of cells
        cells : array_like, optional
            indices of cells to process (default None, i.e. all cells)
        t_min : float, optional
            time of leftmost bin (default None)
        t_max : float, optional
            time of rightmost bin (default None)

        Returns
        -------
        sequence : 2d numpy array of int
            Spike sequence matrix S
        """
        t_min_dat = np.inf
        t_max_dat = -np.inf

        spike_times = np.atleast_2d(spike_times)

        if cells is None:
            cells = np.array(range(len(spike_times)))

        spike_times_nonempty = [x for x in spike_times[cells] if len(x) > 0]
        if len(spike_times_nonempty) > 0:
            t_min_dat = min([t_min_dat] + map(min, spike_times_nonempty))
            t_max_dat = max([t_max_dat] + map(max, spike_times_nonempty))

        if t_min is None:
            t_min = t_min_dat

        if t_max is None:
            t_max = t_max_dat

        if t_min == np.inf or t_max == -np.inf:
            hdlog.info('No spikes!')
            return np.zeros((len(spike_times), 1))

        num_cells = len(cells)
        num_spikes = sum(map(len, spike_times_nonempty))
        sequence = np.zeros((num_cells, num_spikes), dtype=int)

        hdlog.info('Extracting sequences for {c} cells between t_min={m} and t_max={M}, {s} spikes'.format(
            c=num_cells, m=t_min, M=t_max, s=num_spikes))

        times = np.array([s for c in spike_times_nonempty for s in c])
        sort_idx = np.argsort(times)
        idxs = np.array([i for i, c in enumerate(spike_times_nonempty) for _ in c])

        for i, c in enumerate(idxs[sort_idx]):
            sequence[i, c] = 1

        return sequence
Exemple #27
0
    def read_spikes(path_or_files,
                    rate,
                    first_cluster=2,
                    filter_silent=True,
                    return_status=False):
        """
        Reader for `KlustaKwick <https://github.com/klusta-team/klustakwik>`_ files.
        
        Parameters
        ----------
        path_or_files : string
            path of data set or list of \*.res.\* files to load
        rate : float
            sampling rate [in Hz]
        discard_first_cluster : integer, optional
            discard first n clusters, commonly used for unclassified spikes (default 2)
        filter_silent : boolean, optional
            filter out clusters that have no spikes (default True)
        return_status : boolean, optional
            if True returns a status dictionary along with data as second return value (default False)

        Returns
        -------
        spikes_times : numpy array
            returns numpy array of spike times in all clusters. Float values represent spike times
            in seconds (i.e. a value of 1.0 represents a spike at time 1s)
        """

        if isinstance(path_or_files, (str, unicode)):
            # glob all res files
            hdlog.info('Loading KlustaKwick data from %s' %
                       os.path.abspath(path_or_files))
            import glob
            res_files = glob.glob(os.path.join(path_or_files, '*.res.*'))
        else:
            res_files = path_or_files
            hdlog.info('Loading KlustaKwick data from files %s' %
                       str(path_or_files))

        hdlog.info('Processing %d electrode files' % len(res_files))

        spike_times = []
        num_clusters = 0
        num_spikes = 0
        t_min = np.inf
        t_max = -np.inf
        cells_filtered = 0
        electrodes = []

        for fn_res in res_files:
            hdlog.debug('Processing electrode file "%s"..' % fn_res)
            electrodes.append(int(fn_res[fn_res.rindex('.') + 1:]))

            fn_clu = fn_res.replace('.res.', '.clu.')
            if not os.path.exists(fn_clu):
                raise Exception('Cluster file "%s" not found!' % fn_clu)

            #load time stamps
            times = np.loadtxt(fn_res) * (1. / float(rate))

            #load cluster data
            clusters = np.loadtxt(fn_clu).astype(int)
            n_clusters = clusters[0]
            cluster_seq = clusters[1:]

            if cluster_seq.shape[0] != times.shape[0]:
                raise Exception(
                    'Data inconsistent for files %s, %s: lengths differ!' %
                    (fn_res, fn_clu))

            hdlog.debug('%d clusters, %d spikes' %
                        (n_clusters, cluster_seq.shape[0]))

            spike_times_electrode = [
                times[np.where(cluster_seq == c)[0]]
                for c in xrange(first_cluster, n_clusters)
            ]

            if filter_silent:
                c_orig = len(spike_times_electrode)
                spike_times_electrode = [
                    x for x in spike_times_electrode if len(x) > 0
                ]
                c_filtered = c_orig - len(spike_times_electrode)
                cells_filtered += c_filtered

            spike_times.extend(spike_times_electrode)

            num_clusters += n_clusters - first_cluster
            num_spikes += sum(map(len, spike_times_electrode))
            t_min = min(t_min, min(times))
            t_max = max(t_max, max(times))

        status = {
            'clusters': num_clusters,
            'discarded_clusters': first_cluster * len(res_files),
            'filtered': cells_filtered,
            't_min': t_min,
            't_max': t_max,
            'num_spikes': num_spikes,
            'electrodes': electrodes
        }

        hdlog.info(
            'Processed %d clusters (%d discarded), %d cells (%d silent discarded), %d spikes total, t_min=%f s, t_max=%f s, delta=%f s'
            % (num_clusters, first_cluster * len(res_files),
               num_clusters - cells_filtered, cells_filtered, num_spikes,
               t_min, t_max, t_max - t_min))

        if return_status:
            return spike_times, status
        else:
            return spike_times