コード例 #1
0
    def __init__(self, X, cctypes, distargs, n_grid=30, Zv=None, Zrcv=None, hypers=None, seed=None):
        """
        cc_state constructor

        input arguments:
        -- X: a list of numpy data columns.
        -- cctypes: a list of strings where each entry is the data type for 
        each column.
        -- distargs: a list of distargs appropriate for each type in cctype.
        For details on distrags see the documentation for each data type.

        optional arguments:
        -- n_grid: number of bins for hyperparameter grids. Default = 30.
        -- Zv: The assignment of columns to views. If not specified, a 
        partition is generated randomly
        -- Zrcv: The assignment of rows to clusters for each view
        -- ct_kernel: which column transition kenerl to use. Default = 0 (Gibbs)
        -- seed: seed the random number generator. Default = system time.

        example:
        >>> import numpy
        >>> n_rows = 100
        >>> X = [numpy.random.normal(n_rows), numpy.random.normal(n_rows)]
        >>> State = cc_state(X, ['normal', 'normal'], [None, None])
        """

        if seed is not None:
            random.seed(seed)
            numpy.random.seed(seed)

        self.n_rows = len(X[0])
        self.n_cols = len(X)
        self.n_grid = n_grid

        # construct the dims
        self.dims = []
        for col in range(self.n_cols):
            Y = X[col] 
            cctype = cctypes[col]
            if _is_uncollapsed[cctype]:
                dim = cc_dim_uc(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col])
            else:
                dim = cc_dim(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col])
            self.dims.append(dim)

        # set the hyperparameters in the dims
        if hypers is not None:
            for d in range(self.n_cols):
                self.dims[d].set_hypers(hypers[d])

        # initialize CRP alpha  
        self.alpha_grid = utils.log_linspace(1.0/self.n_cols, self.n_cols, self.n_grid)
        self.alpha = random.choice(self.alpha_grid)

        assert len(self.dims) == self.n_cols

        if Zrcv is not None:
            assert Zv is not None
            assert len(Zv) == self.n_cols
            assert len(Zrcv) == max(Zv)+1
            assert len(Zrcv[0]) == self.n_rows

        # construct the view partition
        if Zv is None:
            Zv, Nv, V = utils.crp_gen(self.n_cols, self.alpha)
        else:
            Nv = utils.bincount(Zv)
            V = len(Nv)

        # construct views
        self.views = []
        for view in range(V):
            indices = [i for i in range(self.n_cols) if Zv[i] == view]
            dims_view = []
            for index in indices:
                dims_view.append(self.dims[index])

            if Zrcv is None:
                self.views.append(cc_view(dims_view, n_grid=n_grid))
            else:
                self.views.append(cc_view(dims_view, Z=numpy.array(Zrcv[view]), n_grid=n_grid))

        self.Zv = numpy.array(Zv)
        self.Nv = Nv
        self.V = V
コード例 #2
0
    def __transition_columns_kernel_uncollapsed(self, col, m=3, append=False):
        """Gibbs with auxiliary parameters for uncollapsed data types"""

        if append:
            col = self.n_cols-1

        # get start view, v_a, and check whether a singleton
        v_a = self.Zv[col]

        if append:
            is_singleton = False
            pv = list(self.Nv)
        else:
            is_singleton = (self.Nv[v_a] == 1)

            pv = list(self.Nv)
            # Get crp probabilities under each view. remove from current view.
            # If v_a is a singleton, do not consider move to new singleton view.
            if is_singleton:
                pv[v_a] = self.alpha
            else:
                pv[v_a] -= 1

        # take the log
        pv = numpy.log(numpy.array(pv))

        ps = []
        # calculate probability under each view's assignment
        dim = self.dims[col]

        dim_holder = []

        for v in range(self.V):
            if v == v_a:
                dim_holder.append(dim)
            else:
                dim_holder.append(copy.deepcopy(dim))
                dim_holder[-1].reassign(self.views[v].Z)

            p_v = dim_holder[-1].full_marginal_logp()+pv[v]
            ps.append(p_v)

        # if not a singleton, propose m auxiliary parameters (views)
        if not is_singleton:
            # crp probability of singleton, split m times.
            log_aux = log(self.alpha/float(m))
            proposal_views = []
            for  _ in range(m):
                # propose (from prior) and calculate probability under each view
                dim_holder.append(copy.deepcopy(dim))

                proposal_view = cc_view([dim_holder[-1]], n_grid=self.n_grid)
                proposal_views.append(proposal_view)
                dim_holder[-1].reassign(proposal_view.Z)

                p_v = dim_holder[-1].full_marginal_logp()+log_aux
                ps.append(p_v)


        # draw a view
        v_b = utils.log_pflip(ps)

        newdim = dim_holder[v_b]
        self.dims[dim.index] = newdim

        if append:
            if v_b >= self.V:
                index = v_b-self.V
                assert( index >= 0 and index < m)
                proposal_view = proposal_views[index]
            self.__append_new_dim_to_view(newdim, v_b, proposal_view, is_uncollapsed=True)
            return

        # clean up
        if v_b != v_a:
            if is_singleton:
                assert( v_b < self.V )
                self.__destroy_singleton_view(newdim, v_a, v_b, is_uncollapsed=True)
            elif v_b >= self.V:
                index = v_b-self.V
                assert( index >= 0 and index < m)
                proposal_view = proposal_views[index]
                self.__create_singleton_view(newdim, v_a, proposal_view, is_uncollapsed=True)
            else:
                self.__move_dim_to_view(newdim, v_a, v_b, is_uncollapsed=True)