def append_dim(self, X_f, cctype, distargs=None, ct_kernel=0, m=1): """ Add a new data column to X. Inputs: -- X_f: a numpy array of data -- cctype: type of the data Keyword args: -- distargs: for multinomial data -- ct_kernel: must be 0 or 2. MH kernel cannot be used to append -- m: for ct_kernel=2. Number of auxiliary parameters """ col = self.n_cols n_grid = self.n_grid if _is_uncollapsed[cctype]: dim = cc_dim_uc(X_f, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs) else: dim = cc_dim(X_f, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs) self.n_cols += 1 self.dims.append(dim) self.Zv = numpy.append(self.Zv, -1) if _is_uncollapsed[cctype]: column_transition_kernel_collapsed(m=m, append=True) else: column_transition_kernel_uncollapsed(m=m, append=True) self.__check_partitions()
def gen_dims_from_structure(T, Zv, Zc, cc_types, distargs): n_cols = len(Zv) dims = [] for c in range(n_cols): v = Zv[c] cc_type = cc_types[c] cc_type_class = _cctype_class[cc_type] if _is_uncollapsed[cc_type]: dim_c = cc_dim_uc.cc_dim_uc(T[c], cc_type_class, c, Z=Zc[v], distargs=distargs[c]) else: dim_c = cc_dim.cc_dim(T[c], cc_type_class, c, Z=Zc[v], distargs=distargs[c]) dims.append(dim_c) return dims
def __init__(self, X, cctypes, distargs, n_grid=30, Zv=None, Zrcv=None, hypers=None, seed=None): """ cc_state constructor input arguments: -- X: a list of numpy data columns. -- cctypes: a list of strings where each entry is the data type for each column. -- distargs: a list of distargs appropriate for each type in cctype. For details on distrags see the documentation for each data type. optional arguments: -- n_grid: number of bins for hyperparameter grids. Default = 30. -- Zv: The assignment of columns to views. If not specified, a partition is generated randomly -- Zrcv: The assignment of rows to clusters for each view -- ct_kernel: which column transition kenerl to use. Default = 0 (Gibbs) -- seed: seed the random number generator. Default = system time. example: >>> import numpy >>> n_rows = 100 >>> X = [numpy.random.normal(n_rows), numpy.random.normal(n_rows)] >>> State = cc_state(X, ['normal', 'normal'], [None, None]) """ if seed is not None: random.seed(seed) numpy.random.seed(seed) self.n_rows = len(X[0]) self.n_cols = len(X) self.n_grid = n_grid # construct the dims self.dims = [] for col in range(self.n_cols): Y = X[col] cctype = cctypes[col] if _is_uncollapsed[cctype]: dim = cc_dim_uc(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col]) else: dim = cc_dim(Y, _cctype_class[cctype], col, n_grid=n_grid, distargs=distargs[col]) self.dims.append(dim) # set the hyperparameters in the dims if hypers is not None: for d in range(self.n_cols): self.dims[d].set_hypers(hypers[d]) # initialize CRP alpha self.alpha_grid = utils.log_linspace(1.0/self.n_cols, self.n_cols, self.n_grid) self.alpha = random.choice(self.alpha_grid) assert len(self.dims) == self.n_cols if Zrcv is not None: assert Zv is not None assert len(Zv) == self.n_cols assert len(Zrcv) == max(Zv)+1 assert len(Zrcv[0]) == self.n_rows # construct the view partition if Zv is None: Zv, Nv, V = utils.crp_gen(self.n_cols, self.alpha) else: Nv = utils.bincount(Zv) V = len(Nv) # construct views self.views = [] for view in range(V): indices = [i for i in range(self.n_cols) if Zv[i] == view] dims_view = [] for index in indices: dims_view.append(self.dims[index]) if Zrcv is None: self.views.append(cc_view(dims_view, n_grid=n_grid)) else: self.views.append(cc_view(dims_view, Z=numpy.array(Zrcv[view]), n_grid=n_grid)) self.Zv = numpy.array(Zv) self.Nv = Nv self.V = V