Ejemplo n.º 1
0
 def resample_with_censoring(self,data=[],censored_data=[]):
     '''
     censored_data is full of observations that were censored, meaning a
     value of x really could have been anything >= x, so this method samples
     them out to be at least that large
     '''
     filled_in = self._uncensor_data(censored_data)
     return self.resample(data=combinedata((data,filled_in)))
Ejemplo n.º 2
0
 def resample_with_censoring(self, data=[], censored_data=[]):
     '''
     censored_data is full of observations that were censored, meaning a
     value of x really could have been anything >= x, so this method samples
     them out to be at least that large
     '''
     filled_in = self._uncensor_data(censored_data)
     return self.resample(data=combinedata((data, filled_in)))
Ejemplo n.º 3
0
    def _generate(self,T):
        alpha = self.alpha_0
        betavec = self.beta.betavec
        model = self.model
        self.stateseq = np.array([])

        ks = list(model._occupied()) + [None]
        firststateidx = sample_discrete(np.arange(len(ks)))
        if firststateidx == len(ks)-1:
            firststate = self._new_label(ks)
        else:
            firststate = ks[firststateidx]

        self.dur.resample(combinedata((model._durs_withlabel(firststate),self._durs_withlabel(firststate))))
        firststate_dur = self.dur.rvs()

        self.stateseq = np.ones(firststate_dur,dtype=int)*firststate
        t = firststate_dur

        # run a family-CRF (CRF with durations) forwards
        while t < T:
            ks = list(model._occupied() | self._occupied())
            betarest = 1-sum(betavec[k] for k in ks)
            fromto_counts = np.array([model._counts_fromto(self.stateseq[t-1],k)
                                            + self._counts_fromto(self.stateseq[t-1],k)
                                            for k in ks])
            scores = np.array([(alpha*betavec[k] + ft if k != self.stateseq[t-1] else 0)
                    for k,ft in zip(ks,fromto_counts)]
                    + [alpha*(1-betavec[self.stateseq[t-1]])*betarest])
            nextstateidx = sample_discrete(scores)
            if nextstateidx == scores.shape[0]-1:
                nextstate = self._new_label(ks)
            else:
                nextstate = ks[nextstateidx]

            # now get the duration of nextstate!
            self.dur.resample(combinedata((model._durs_withlabel(nextstate),self._durs_withlabel(nextstate))))
            nextstate_dur = self.dur.rvs()

            self.stateseq = np.concatenate((self.stateseq,np.ones(nextstate_dur,dtype=int)*nextstate))

            t += nextstate_dur

        self.T = len(self.stateseq)
Ejemplo n.º 4
0
    def resample_with_censoring_and_truncation(self,data=[],censored_data=[],left_truncation_level=None):
        filled_in = self._uncensor_data(censored_data)

        if left_truncation_level is not None and left_truncation_level > 1:
            norm = self.pmf(np.arange(1,left_truncation_level)).sum()
            num_rejected = np.random.geometric(1-norm)-1
            rejected_observations = self.rvs_given_less_than(left_truncation_level,num_rejected) \
                    if num_rejected > 0 else []
        else:
            rejected_observations = []

        self.resample(data=combinedata((data,filled_in,rejected_observations)))
Ejemplo n.º 5
0
    def resample_with_censoring_and_truncation(self,
                                               data=[],
                                               censored_data=[],
                                               left_truncation_level=None):
        filled_in = self._uncensor_data(censored_data)

        if left_truncation_level is not None and left_truncation_level > 1:
            norm = self.pmf(np.arange(1, left_truncation_level)).sum()
            num_rejected = np.random.geometric(1 - norm) - 1
            rejected_observations = self.rvs_given_less_than(left_truncation_level,num_rejected) \
                    if num_rejected > 0 else []
        else:
            rejected_observations = []

        self.resample(data=combinedata((data, filled_in,
                                        rejected_observations)))