Example #1
0
    def update_hypers(clusters, grids):
        # resample hypers
        λ = clusters[0].λ
        α = clusters[0].α
        β = clusters[0].β

        which_hypers = [0,1,2]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_λ = cc_beta_uc.calc_λ_conditional_logps(clusters, grids['λ'], α, β)
                λ_index = utils.log_pflip(lp_λ)
                λ = grids['λ'][λ_index]
            elif hyper == 1:
                lp_α = cc_beta_uc.calc_α_conditional_logps(clusters, grids['α'], λ, β)
                α_index = utils.log_pflip(lp_α)
                α = grids['α'][α_index]
            elif hyper == 2:
                lp_β = cc_beta_uc.calc_β_conditional_logps(clusters, grids['β'], λ, α)
                β_index = utils.log_pflip(lp_β)
                β = grids['β'][β_index]
            else:
                raise ValueError("invalild hyper")
        
        hypers = dict()
        hypers['λ'] = λ
        hypers['α'] = α
        hypers['β'] = β
        
        return hypers
Example #2
0
    def update_hypers(clusters, grids):
        a = clusters[0].a
        b = clusters[0].b
        k = clusters[0].k

        which_hypers = [0,1,2]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_vonmises.calc_a_conditional_logps(clusters, grids['a'],b,k)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_vonmises.calc_b_conditional_logps(clusters, grids['b'],a,k)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            elif hyper == 2:
                lp_k = cc_vonmises.calc_k_conditional_logps(clusters, grids['k'],a,b)
                k_index = utils.log_pflip(lp_k)
                k = grids['k'][k_index]
            else:
                raise ValueError("invalid hyper")
        
        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b
        hypers['k'] = k

        return hypers
Example #3
0
    def update_hypers(clusters, grids):
        # resample alpha
        a = clusters[0].a
        b = clusters[0].b

        which_hypers = [0, 1]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_poisson.calc_a_conditional_logps(
                    clusters, grids['a'], b)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_poisson.calc_b_conditional_logps(
                    clusters, grids['b'], a)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            else:
                raise ValueError("invalid hyper")

        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b

        return hypers
Example #4
0
    def update_hypers(clusters, grids):
        # resample alpha
        a = clusters[0].a
        b = clusters[0].b

        which_hypers = [0,1]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_poisson.calc_a_conditional_logps(clusters, grids['a'], b)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_poisson.calc_b_conditional_logps(clusters, grids['b'], a)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            else:
                raise ValueError("invalid hyper")

        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b
        
        return hypers
    def update_hypers(clusters, grids):
        # resample alpha
        lp_alpha = cc_multinomial.calc_alpha_conditional_logps(clusters, grids['alpha'])
        alpha_index = utils.log_pflip(lp_alpha)
        
        hypers = dict()
        hypers['alpha'] = grids['alpha'][alpha_index]

        return hypers
Example #6
0
 def __transition_state_alpha(self):
     
     logps = numpy.zeros(self.n_grid)
     for i in range(self.n_grid):
         alpha = self.alpha_grid[i]
         logps[i] = utils.unorm_lcrp_post(alpha, self.n_cols, self.V, lambda x: 0)
     # log_pdf_lambda = lambda a : utils.lcrp(self.n_cols, self.Nv, a) + self.alpha_prior_lambda(a)
     
     index = utils.log_pflip(logps)
     self.alpha = self.alpha_grid[index]
Example #7
0
    def update_hypers(clusters, grids):
        # resample alpha
        lp_alpha = cc_multinomial.calc_alpha_conditional_logps(
            clusters, grids['alpha'])
        alpha_index = utils.log_pflip(lp_alpha)

        hypers = dict()
        hypers['alpha'] = grids['alpha'][alpha_index]

        return hypers
Example #8
0
    def reassign_rows_to_cats(self, which_rows=None):
        """
        It do what it say--reassign rows to categories.
        optional arguments:
        -- which_rows: a list of rows to reassign. If not specified, reassigns 
        every row
        """

        log_alpha = log(self.alpha)

        if which_rows is None:
            which_rows = [i for i in range(self.N)]

        # random.shuffle(which_rows)

        for row in which_rows:
            # get the current assignment, z_a, and determine if it is a singleton
            z_a = self.Z[row]
            is_singleton = (self.Nk[z_a] == 1)

            # get CRP probabilities
            pv = list(self.Nk)
            if is_singleton:
                # If z_a is a singleton, do not consider a new singleton
                pv[z_a] = self.alpha
            else:
                pv[z_a] -= 1

            # take the log of the CRP probabilities
            pv = numpy.log(numpy.array(pv))

            ps = []
            # calculate the probability of each row in each category, k \in K
            for k in range(self.K):
                if k == z_a and is_singleton:
                    lp = self.singleton_predictive_logp(row) + pv[k]
                else:
                    lp = self.row_predictive_logp(row, k) + pv[k]
                ps.append(lp)

            # propose singleton
            if not is_singleton:
                lp = self.singleton_predictive_logp(row) + log_alpha
                ps.append(lp)

            # Draw new assignment, z_b
            z_b = utils.log_pflip(ps)

            if z_a != z_b:
                if is_singleton:
                    self.destroy_singleton_cluster(row, z_a, z_b)
                elif z_b == self.K:
                    self.create_singleton_cluster(row, z_a)
                else:
                    self.move_row_to_cluster(row, z_a, z_b)
Example #9
0
    def reassign_rows_to_cats(self, which_rows=None):
        """
        It do what it say--reassign rows to categories.
        optional arguments:
        -- which_rows: a list of rows to reassign. If not specified, reassigns 
        every row
        """
        
        log_alpha = log(self.alpha)

        if which_rows is None:
            which_rows = [i for i in range(self.N)]

        # random.shuffle(which_rows)

        for row in which_rows:
            # get the current assignment, z_a, and determine if it is a singleton
            z_a = self.Z[row]
            is_singleton = (self.Nk[z_a] == 1)

            # get CRP probabilities
            pv = list(self.Nk)
            if is_singleton:
                # If z_a is a singleton, do not consider a new singleton
                pv[z_a] = self.alpha
            else:
                pv[z_a] -= 1
                
            # take the log of the CRP probabilities
            pv = numpy.log(numpy.array(pv))

            ps = []
            # calculate the probability of each row in each category, k \in K
            for k in range(self.K):
                if k == z_a and is_singleton:
                    lp = self.singleton_predictive_logp(row) + pv[k]
                else:
                    lp = self.row_predictive_logp(row,k) + pv[k]
                ps.append(lp)

            # propose singleton
            if not is_singleton:
                lp = self.singleton_predictive_logp(row) + log_alpha
                ps.append(lp)

            # Draw new assignment, z_b
            z_b = utils.log_pflip(ps)

            if z_a != z_b:
                if is_singleton:
                    self.destroy_singleton_cluster(row, z_a, z_b)
                elif z_b == self.K:
                    self.create_singleton_cluster(row, z_a)
                else:
                    self.move_row_to_cluster(row, z_a, z_b)
Example #10
0
    def update_hypers(clusters, grids):
        # resample hypers
        m = clusters[0].m
        s = clusters[0].s
        r = clusters[0].r
        nu = clusters[0].nu

        which_hypers = [0, 1, 2, 3]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_m = cc_normal.calc_m_conditional_logps(
                    clusters, grids['m'], r, s, nu)
                m_index = utils.log_pflip(lp_m)
                m = grids['m'][m_index]
            elif hyper == 1:
                lp_s = cc_normal.calc_s_conditional_logps(
                    clusters, grids['s'], m, r, nu)
                s_index = utils.log_pflip(lp_s)
                s = grids['s'][s_index]
            elif hyper == 2:
                lp_r = cc_normal.calc_r_conditional_logps(
                    clusters, grids['r'], m, s, nu)
                r_index = utils.log_pflip(lp_r)
                r = grids['r'][r_index]
            elif hyper == 3:
                lp_nu = cc_normal.calc_nu_conditional_logps(
                    clusters, grids['nu'], m, r, s)
                nu_index = utils.log_pflip(lp_nu)
                nu = grids['nu'][nu_index]
            else:
                raise ValueError("invalild hyper")

        hypers = dict()
        hypers['m'] = m
        hypers['s'] = s
        hypers['r'] = r
        hypers['nu'] = nu

        return hypers
Example #11
0
 def transition_alpha(self):
     """
     Calculate CRP alpha conditionals over grid and transition
     """
     logps = numpy.zeros(len(self.alpha_grid))
     for i in range(len(self.alpha_grid)):
         alpha = self.alpha_grid[i]
         logps[i] = utils.unorm_lcrp_post(alpha, self.N, self.K, lambda x: 0)
     # log_pdf_lambda = lambda a : utils.lcrp(self.n_cols, self.Nv, a) + self.alpha_prior_lambda(a)
     
     index = utils.log_pflip(logps)
     self.alpha = self.alpha_grid[index]
Example #12
0
    def update_hypers(clusters, grids):
        a = clusters[0].a
        b = clusters[0].b
        shape = clusters[0].shape
        scale = clusters[0].scale

        which_hypers = [0, 1, 2, 3]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_vonmises_uc.calc_a_conditional_logps(
                    clusters, grids['a'], b, shape, scale)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_vonmises_uc.calc_b_conditional_logps(
                    clusters, grids['b'], a, shape, scale)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            elif hyper == 2:
                lp_scale = cc_vonmises_uc.calc_scale_conditional_logps(
                    clusters, grids['scale'], b, a, shape)
                scale_index = utils.log_pflip(lp_scale)
                scale = grids['scale'][scale_index]
            elif hyper == 3:
                lp_shape = cc_vonmises_uc.calc_shape_conditional_logps(
                    clusters, grids['shape'], a, b, scale)
                shape_index = utils.log_pflip(lp_shape)
                shape = grids['shape'][shape_index]
            else:
                raise ValueError("invalid hyper")

        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b
        hypers['shape'] = shape
        hypers['scale'] = scale

        return hypers
Example #13
0
    def update_hypers(clusters, grids):
        # resample alpha
        a = clusters[0].a
        b = clusters[0].b
        t = clusters[0].t
        m = clusters[0].m

        which_hypers = [0, 1, 2, 3]
        random.shuffle(which_hypers)
        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_lognormal.calc_a_conditional_logps(
                    clusters, grids['a'], b, t, m)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_lognormal.calc_b_conditional_logps(
                    clusters, grids['b'], a, t, m)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            elif hyper == 2:
                lp_t = cc_lognormal.calc_t_conditional_logps(
                    clusters, grids['t'], a, b, m)
                t_index = utils.log_pflip(lp_t)
                t = grids['t'][t_index]
            elif hyper == 3:
                lp_m = cc_lognormal.calc_m_conditional_logps(
                    clusters, grids['m'], a, b, t)
                m_index = utils.log_pflip(lp_m)
                m = grids['m'][m_index]
            else:
                raise ValueError("invalid hyper")

        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b
        hypers['t'] = t
        hypers['m'] = m

        return hypers
Example #14
0
    def transition_alpha(self):
        """
        Calculate CRP alpha conditionals over grid and transition
        """
        logps = numpy.zeros(len(self.alpha_grid))
        for i in range(len(self.alpha_grid)):
            alpha = self.alpha_grid[i]
            logps[i] = utils.unorm_lcrp_post(alpha, self.N, self.K,
                                             lambda x: 0)
        # log_pdf_lambda = lambda a : utils.lcrp(self.n_cols, self.Nv, a) + self.alpha_prior_lambda(a)

        index = utils.log_pflip(logps)
        self.alpha = self.alpha_grid[index]
Example #15
0
    def update_hypers(clusters, grids):
        # resample hypers
        m = clusters[0].m
        s = clusters[0].s
        r = clusters[0].r
        nu = clusters[0].nu

        which_hypers = [0,1,2,3]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_m = cc_normal.calc_m_conditional_logps(clusters, grids['m'], r, s, nu)
                m_index = utils.log_pflip(lp_m)
                m = grids['m'][m_index]
            elif hyper == 1:
                lp_s = cc_normal.calc_s_conditional_logps(clusters, grids['s'], m, r, nu)
                s_index = utils.log_pflip(lp_s)
                s = grids['s'][s_index]
            elif hyper == 2:
                lp_r = cc_normal.calc_r_conditional_logps(clusters, grids['r'], m, s, nu)
                r_index = utils.log_pflip(lp_r)
                r = grids['r'][r_index]
            elif hyper == 3:
                lp_nu = cc_normal.calc_nu_conditional_logps(clusters, grids['nu'], m, r, s)
                nu_index = utils.log_pflip(lp_nu)
                nu = grids['nu'][nu_index]
            else:
                raise ValueError("invalild hyper")
        
        hypers = dict()
        hypers['m'] = m
        hypers['s'] = s
        hypers['r'] = r
        hypers['nu'] = nu
        
        return hypers
Example #16
0
    def update_hypers(clusters, grids):
        alpha = clusters[0].alpha
        beta = clusters[0].beta

        which_hypers = [0,1]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_alpha = cc_binomial.calc_alpha_conditional_logps(clusters, grids['alpha'], beta)
                alpha_index = utils.log_pflip(lp_alpha)
                alpha = grids['alpha'][alpha_index]
            elif hyper == 1:
                lp_beta = cc_binomial.calc_beta_conditional_logps(clusters, grids['beta'], alpha)
                beta_index = utils.log_pflip(lp_beta)
                beta = grids['beta'][beta_index]
            else:
                raise ValueError("invalid hyper")

        hypers = dict()
        hypers['alpha'] = alpha
        hypers['beta'] = beta

        return hypers
Example #17
0
    def update_hypers(clusters, grids):
        a = clusters[0].a
        b = clusters[0].b
        shape = clusters[0].shape
        scale = clusters[0].scale

        which_hypers = [0,1,2,3]
        random.shuffle(which_hypers)

        for hyper in which_hypers:
            if hyper == 0:
                lp_a = cc_vonmises_uc.calc_a_conditional_logps(clusters, grids['a'], b, shape, scale)
                a_index = utils.log_pflip(lp_a)
                a = grids['a'][a_index]
            elif hyper == 1:
                lp_b = cc_vonmises_uc.calc_b_conditional_logps(clusters, grids['b'], a, shape, scale)
                b_index = utils.log_pflip(lp_b)
                b = grids['b'][b_index]
            elif hyper == 2:
                lp_scale = cc_vonmises_uc.calc_scale_conditional_logps(clusters, grids['scale'], b, a, shape)
                scale_index = utils.log_pflip(lp_scale)
                scale = grids['scale'][scale_index]
            elif hyper == 3:
                lp_shape = cc_vonmises_uc.calc_shape_conditional_logps(clusters, grids['shape'], a, b, scale)
                shape_index = utils.log_pflip(lp_shape)
                shape = grids['shape'][shape_index]
            else:
                raise ValueError("invalid hyper")
        
        hypers = dict()
        hypers['a'] = a
        hypers['b'] = b
        hypers['shape'] = shape
        hypers['scale'] = scale

        return hypers
Example #18
0
def mutual_information(state, col_1, col_2, N=1000):
	view_1 = state.Zv[col_1]
	view_2 = state.Zv[col_2]

	if view_1 != view_2:
		print("mutual_information: not in same view: MI = 0.0")
		return 0.0

	log_crp = su.get_cluster_crps(state, view_1)
	K = len(log_crp)

	clusters_col_1 = su.create_cluster_set(state, col_1)
	clusters_col_2 = su.create_cluster_set(state, col_2)

	MI = 0

	Px = numpy.zeros(K)
	Py = numpy.zeros(K)
	Pxy = numpy.zeros(K)

	for i in range(N):
		c = utils.log_pflip(log_crp)
		x = clusters_col_1[c].predictive_draw()
		y = clusters_col_2[c].predictive_draw()
		for k in range(K):
			Px[k] = clusters_col_1[k].predictive_logp(x)
			Py[k] = clusters_col_2[k].predictive_logp(y)
			Pxy[k] = Px[k]+Py[k]+log_crp[k]
			Px[k] += log_crp[k]
			Py[k] += log_crp[k]

		PX = logsumexp(Px)
		PY = logsumexp(Py)
		PXY = logsumexp(Pxy)

		MI += (PXY-PX-PY)
	
	MI /= float(N)

	if MI < 0.0:
		print("mutual_information: MI < 0 (%f)" % MI)
		MI = 0.0

	return MI
Example #19
0
def mutual_information(state, col_1, col_2, N=1000):
    view_1 = state.Zv[col_1]
    view_2 = state.Zv[col_2]

    if view_1 != view_2:
        print("mutual_information: not in same view: MI = 0.0")
        return 0.0

    log_crp = su.get_cluster_crps(state, view_1)
    K = len(log_crp)

    clusters_col_1 = su.create_cluster_set(state, col_1)
    clusters_col_2 = su.create_cluster_set(state, col_2)

    MI = 0

    Px = numpy.zeros(K)
    Py = numpy.zeros(K)
    Pxy = numpy.zeros(K)

    for i in range(N):
        c = utils.log_pflip(log_crp)
        x = clusters_col_1[c].predictive_draw()
        y = clusters_col_2[c].predictive_draw()
        for k in range(K):
            Px[k] = clusters_col_1[k].predictive_logp(x)
            Py[k] = clusters_col_2[k].predictive_logp(y)
            Pxy[k] = Px[k] + Py[k] + log_crp[k]
            Px[k] += log_crp[k]
            Py[k] += log_crp[k]

        PX = logsumexp(Px)
        PY = logsumexp(Py)
        PXY = logsumexp(Pxy)

        MI += (PXY - PX - PY)

    MI /= float(N)

    if MI < 0.0:
        print("mutual_information: MI < 0 (%f)" % MI)
        MI = 0.0

    return MI
Example #20
0
def test_predictive_draw(state, N=None):
    import pylab
    if state.n_cols != 2:
        print("state must have exactly 2 columns")
        return

    if N is None:
        N = state.n_rows

    view_1 = state.Zv[0]
    view_2 = state.Zv[1]

    if view_1 != view_2:
        print("Columns not in same view")
        return

    log_crp = su.get_cluster_crps(state, 0)
    K = len(log_crp)

    X = numpy.zeros(N)
    Y = numpy.zeros(N)

    clusters_col_1 = su.create_cluster_set(state, 0)
    clusters_col_2 = su.create_cluster_set(state, 1)

    for i in range(N):
        c = utils.log_pflip(log_crp)
        x = clusters_col_1[c].predictive_draw()
        y = clusters_col_2[c].predictive_draw()

        X[i] = x
        Y[i] = y

    pylab.scatter(X, Y, color='red', label='inferred')
    pylab.scatter(state.dims[0].X,
                  state.dims[1].X,
                  color='blue',
                  label='actual')
    pylab.show()
Example #21
0
def test_predictive_draw(state, N=None):
	import pylab
	if state.n_cols != 2:
		print("state must have exactly 2 columns")
		return

	if N is None:
		N = state.n_rows

	view_1 = state.Zv[0]
	view_2 = state.Zv[1]

	if view_1 != view_2:
		print("Columns not in same view")
		return

	log_crp = su.get_cluster_crps(state, 0)
	K = len(log_crp)

	X = numpy.zeros(N)
	Y = numpy.zeros(N)

	clusters_col_1 = su.create_cluster_set(state, 0)
	clusters_col_2 = su.create_cluster_set(state, 1)

	for i in range(N):
		c = utils.log_pflip(log_crp)
		x = clusters_col_1[c].predictive_draw()
		y = clusters_col_2[c].predictive_draw()

		X[i] = x
		Y[i] = y

	pylab.scatter(X,Y, color='red', label='inferred')
	pylab.scatter(state.dims[0].X, state.dims[1].X, color='blue', label='actual')
	pylab.show()
Example #22
0
    def __transition_columns_kernel_uncollapsed(self, col, m=3, append=False):
        """Gibbs with auxiliary parameters for uncollapsed data types"""

        if append:
            col = self.n_cols-1

        # get start view, v_a, and check whether a singleton
        v_a = self.Zv[col]

        if append:
            is_singleton = False
            pv = list(self.Nv)
        else:
            is_singleton = (self.Nv[v_a] == 1)

            pv = list(self.Nv)
            # Get crp probabilities under each view. remove from current view.
            # If v_a is a singleton, do not consider move to new singleton view.
            if is_singleton:
                pv[v_a] = self.alpha
            else:
                pv[v_a] -= 1

        # take the log
        pv = numpy.log(numpy.array(pv))

        ps = []
        # calculate probability under each view's assignment
        dim = self.dims[col]

        dim_holder = []

        for v in range(self.V):
            if v == v_a:
                dim_holder.append(dim)
            else:
                dim_holder.append(copy.deepcopy(dim))
                dim_holder[-1].reassign(self.views[v].Z)

            p_v = dim_holder[-1].full_marginal_logp()+pv[v]
            ps.append(p_v)

        # if not a singleton, propose m auxiliary parameters (views)
        if not is_singleton:
            # crp probability of singleton, split m times.
            log_aux = log(self.alpha/float(m))
            proposal_views = []
            for  _ in range(m):
                # propose (from prior) and calculate probability under each view
                dim_holder.append(copy.deepcopy(dim))

                proposal_view = cc_view([dim_holder[-1]], n_grid=self.n_grid)
                proposal_views.append(proposal_view)
                dim_holder[-1].reassign(proposal_view.Z)

                p_v = dim_holder[-1].full_marginal_logp()+log_aux
                ps.append(p_v)


        # draw a view
        v_b = utils.log_pflip(ps)

        newdim = dim_holder[v_b]
        self.dims[dim.index] = newdim

        if append:
            if v_b >= self.V:
                index = v_b-self.V
                assert( index >= 0 and index < m)
                proposal_view = proposal_views[index]
            self.__append_new_dim_to_view(newdim, v_b, proposal_view, is_uncollapsed=True)
            return

        # clean up
        if v_b != v_a:
            if is_singleton:
                assert( v_b < self.V )
                self.__destroy_singleton_view(newdim, v_a, v_b, is_uncollapsed=True)
            elif v_b >= self.V:
                index = v_b-self.V
                assert( index >= 0 and index < m)
                proposal_view = proposal_views[index]
                self.__create_singleton_view(newdim, v_a, proposal_view, is_uncollapsed=True)
            else:
                self.__move_dim_to_view(newdim, v_a, v_b, is_uncollapsed=True)