Esempio n. 1
0
 def _get_statistics(self,data):
     n = getdatasize(data)
     if n > 0:
         if isinstance(data, np.ndarray):
             xss = (data**2).sum()
         else:
             xss = np.sum([(d**2).sum() for d in data])
     else:
         xss = 0
     return n, xss
Esempio n. 2
0
 def _get_statistics(self, data):
     n = getdatasize(data)
     if n > 0:
         if isinstance(data, np.ndarray):
             xss = (data**2).sum()
         else:
             xss = np.sum([(d**2).sum() for d in data])
     else:
         xss = 0
     return n, xss
Esempio n. 3
0
 def resample(self,data=[],niter=None):
     niter = niter if niter else self.niter
     if getdatasize(data) == 0:
         self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\
             .reshape(self.h_0.shape)
         self.sigma = sample_invwishart(self.S_0,self.nu_0)
     else:
         yyT, yxT, xxT, n = self._get_statistics(data)
         for itr in range(niter):
             self._resample_A(xxT, yxT, self.sigma)
             self._resample_sigma(xxT, yxT, yyT, n, self.A)
Esempio n. 4
0
    def max_likelihood(self,data,weights=None):
        if weights is not None:
            raise NotImplementedError
        assert isinstance(data,list) or isinstance(data,np.ndarray)
        if isinstance(data,list):
            data = np.concatenate(data)

        if getdatasize(data) > 0:
            self.add_data(data)
            self.EM_fit()
            self.labels_list = []
Esempio n. 5
0
 def resample(self, data=[], niter=None):
     niter = niter if niter else self.niter
     if getdatasize(data) == 0:
         self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\
             .reshape(self.h_0.shape)
         self.sigma = sample_invwishart(self.S_0, self.nu_0)
     else:
         yyT, yxT, xxT, n = self._get_statistics(data)
         for itr in range(niter):
             self._resample_A(xxT, yxT, self.sigma)
             self._resample_sigma(xxT, yxT, yyT, n, self.A)
Esempio n. 6
0
    def max_likelihood(self, data, weights=None):
        if weights is not None:
            raise NotImplementedError
        assert isinstance(data, list) or isinstance(data, np.ndarray)
        if isinstance(data, list):
            data = np.concatenate(data)

        if getdatasize(data) > 0:
            self.add_data(data)
            self.EM_fit()
            self.labels_list = []
Esempio n. 7
0
 def _get_statistics(self,data):
     n = getdatasize(data)
     if n > 0:
         data = flattendata(data)
         feasible = self.r_support <= data.min()
         assert np.any(feasible)
         r_support = self.r_support[feasible]
         normalizers = (special.gammaln(data[:,na]) - special.gammaln(data[:,na]-r_support+1)
                 - special.gammaln(r_support)).sum(0)
         return n, data.sum(), normalizers, feasible
     else:
         return n, None, None, None
Esempio n. 8
0
 def _get_statistics(self,data):
     n = getdatasize(data)
     if n > 0:
         data = flattendata(data)
         feasible = self.r_support <= data.min()
         assert np.any(feasible)
         r_support = self.r_support[feasible]
         normalizers = (special.gammaln(data[:,na]) - special.gammaln(data[:,na]-r_support+1)
                 - special.gammaln(r_support)).sum(0)
         return n, data.sum(), normalizers, feasible
     else:
         return n, None, None, None
Esempio n. 9
0
 def resample(self,data=[],niter=20):
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = atleast_2d(flattendata(data))
         N = len(data)
         for itr in range(niter):
             ### resample r
             msum = sample_crp_tablecounts(self.r,data).sum()
             self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
             ### resample p
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 10
0
 def resample(self,data=[],niter=20):
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = np.atleast_2d(flattendata(data))
         ones = np.ones(data.shape[1],dtype=float)
         for itr in range(niter):
             ### resample r
             msum = sample_crp_tablecounts(float(self.r),data,ones).sum()
             self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
             ### resample p
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 11
0
    def resample(self,data):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data,list) or isinstance(data,np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data,np.ndarray):
                data = np.concatenate(data)

            self.add_data(data,initialize_from_prior=False)

            for itr in range(self.niter):
                self.resample_model()

            self.labels_list.pop()
        else:
            self.resample_model()
Esempio n. 12
0
 def resample_python(self,data=[],niter=20):
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = len(data)
         for itr in range(niter):
             ### resample r
             msum = 0.
             for n in data:
                 msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum()
             self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
             ### resample p
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 13
0
 def resample_python(self,data=[],niter=20):
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = len(data)
         for itr in range(niter):
             ### resample r
             msum = 0.
             for n in data:
                 msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum()
             self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
             ### resample p
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 14
0
    def resample(self, data):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data, list) or isinstance(data, np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data, np.ndarray):
                data = np.concatenate(data)

            self.add_data(data, initialize_from_prior=False)

            for itr in range(self.niter):
                self.resample_model()

            self.labels_list.pop()
        else:
            self.resample_model()
Esempio n. 15
0
    def _get_statistics(self,data):
        if getdatasize(data) == 0:
            n, tot = 0, 0
        elif isinstance(data,np.ndarray):
            assert np.all(data >= 0)
            data = np.atleast_1d(data)
            n, tot = data.shape[0], data.sum()
        elif isinstance(data,list):
            assert all(np.all(d >= 0) for d in data)
            n = sum(d.shape[0] for d in data)
            tot = sum(d.sum() for d in data)
        else:
            assert np.isscalar(data)
            n = 1
            tot = data

        return np.array([n, tot])
Esempio n. 16
0
 def resample_logseriesaug(self,data=[],niter=20):
     # an alternative algorithm, kind of opaque and no advantages...
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = data.shape[0]
         logF = self.logF
         L_i = np.zeros(N)
         data_nz = data[data > 0]
         for itr in range(niter):
             logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF
             L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1
             self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N))
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 17
0
 def resample_logseriesaug(self,data=[],niter=20):
     # an alternative algorithm, kind of opaque and no advantages...
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = data.shape[0]
         logF = self.logF
         L_i = np.zeros(N)
         data_nz = data[data > 0]
         for itr in range(niter):
             logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF
             L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1
             self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N))
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
     return self
Esempio n. 18
0
    def _get_statistics(self,data):
        if getdatasize(data) == 0:
            n, tot = 0, 0
        elif isinstance(data,np.ndarray):
            assert np.all(data >= 0)
            data = np.atleast_1d(data)
            n, tot = data.shape[0], data.sum()
        elif isinstance(data,list):
            assert all(np.all(d >= 0) for d in data)
            n = sum(d.shape[0] for d in data)
            tot = sum(d.sum() for d in data)
        else:
            assert np.isscalar(data)
            n = 1
            tot = data

        return np.array([n, tot])