Exemple #1
0
def getRandomCSIMixture_conditionalDists(G, p, KL_lower, KL_upper, M=8, dtypes='discgauss', seed = None, fullstruct=False, disc_sampling_dist=None):

#    if seed:
#        random.seed(seed)
#        mixextend.set_gsl_rng_seed(seed)
#        #print '*** seed=',seed
#
#    else: # XXX debug
#        seed = random.randint(1,9999999)
#        mixextend.set_gsl_rng_seed(seed)
#        random.seed(seed)
#        #print '*** seed=',seed



    if disc_sampling_dist == None:
        discSamp = DirichletPrior(M,[1.0] * M ) # uniform sampling
    else:
        discSamp = disc_sampling_dist



    min_sigma = 0.3    # minimal std for Normal
    max_sigma = 5.0   # maximal std for Normal
    min_mu = -25.0      # minimal mean
    max_mu = 25.0       # maximal mean

    assert dtypes in ['disc','gauss','discgauss']

    if dtypes == 'disc':
        featureTypes = [0] * p
    elif dtypes == 'gauss':
        featureTypes = [1] * p
    elif dtypes == 'discgauss':
        # discrete or Normal features for now, chosen uniformly
        # 0 discrete, 1 Normal
        featureTypes = [ random.choice( (0, 1) )  for i in range(p) ]
    else:
        raise TypeError

    #print featureTypes


    # generate random CSI structures

    if G < 15:
        P = setPartitions.generate_all_partitions(G) # XXX too slow for large G
    #print P

    C = []

    leaders = []
    groups = []
    for j in range(p):
        c_j = {}

        leaders_j = []
        groups_j = {}


        if fullstruct == True:
            struct_j = [(i,) for i in range(G)]

        elif G < 15:
            struct_j = random.choice(P)
        else:
            print 'WARNING: improper structure sampling !'
            struct_j = setPartitions.get_random_partition(G)

        #print '\nstruct',j,struct_j

        for i,grp in enumerate(struct_j):

            lg = list(grp)

            #print lg

            lgj = lg.pop(0)

            #print lgj

            leaders_j.append(lgj)
            groups_j[lgj] = lg

            max_tries = 100000
            tries = 0


            if featureTypes[j] == 0:
                acc = 0

                while acc == 0:
                    cand = discSamp.sample()

                    #print 'Cand:', cand

                    acc = 1
                    for d in c_j:
                        KL_dist = sym_kl_dist(c_j[d],cand)

                        #print c_j[d],cand, KL_dist

                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError, 'Failed to find separated parameters !'


                for cind in grp:
                    c_j[cind] = cand


            elif featureTypes[j] == 1:
                acc = 0
                while acc == 0:
                    mu = random.uniform(min_mu, max_mu)
                    sigma = random.uniform(min_sigma, max_sigma)
                    cand = NormalDistribution(mu, sigma )
                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(c_j[d],cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError


                #    print '.',
                #print

                for cind in grp:
                    c_j[cind] = cand

            else:
                RuntimeError

        leaders.append(leaders_j)
        groups.append(groups_j)

        C.append(c_j)

    comps = []
    for i in range(G):
        comps.append( ProductDistribution( [ C[j][i] for j in range(p) ] ) )

    pi = get_random_pi(G, 0.3 / G)
    #print '** pi =',pi


    # create prior
    piprior = DirichletPrior(G,[2.0]*G)

    cprior = []
    for j in range(p):
        if featureTypes[j] == 0:
            cprior.append( DirichletPrior(M,[1.02]*M))

        elif featureTypes[j] == 1:
            cprior.append( NormalGammaPrior(0,0,0,0))   # dummy parameters, to be set later

        else:
            RuntimeError

    mprior = MixtureModelPrior(0.1,0.1, piprior, cprior)


    m = BayesMixtureModel(G,pi, comps, mprior, struct =1)
    m.leaders = leaders
    m.groups = groups

    m.identifiable()
    m.updateFreeParams()
    #print m

    return m
Exemple #2
0
def updateStructureBayesianFullEnumeration(model,
                                           data,
                                           objFunction='MAP',
                                           silent=1):
    """
    CSI structure learning with full enumeration of the structure space.

    @param model: BayesMixtureModel object
    @param data: DataSet object
    @param objFunction: objective function of the optimization, only 'MAP' so far
    @param silent: verbosity flag, default is True
    """

    P = setPartitions.generate_all_partitions(
        model.G, order='reverse')  # XXX too slow for large G

    max_ind = len(P) - 1

    curr_indices = [0] * model.dist_nr
    curr_indices[0] = -1

    lpos = model.dist_nr - 1

    nr = 1
    term = 0
    prev_indices = [-1] * model.dist_nr

    # initial structure is full CSI matrix
    best_structure = [[
        (i, ) for i in range(model.G)
    ]] * model.dist_nr  #  [ tuple(range(model.G)) ] * model.dist_nr

    # building data likelihood factor matrix for the current group structure
    l = np.zeros((model.dist_nr, model.G, data.N), dtype='Float64')
    for j in range(model.dist_nr):
        for lead_j in range(model.G):
            l_row = model.components[lead_j][j].pdf(data.getInternalFeature(j))
            l[j, lead_j, :] = l_row

    # g is the matrix of log posterior probabilities of the components given the data
    g = np.sum(l, axis=0)
    for k in range(model.G):
        g[k, :] += np.log(model.pi[k])

    sum_logs = matrix_sum_logs(g)
    g_norm = g - sum_logs
    tau = np.exp(g_norm)

    if not silent:
        print "\ntau="
        for tt in tau:
            print tt.tolist()
        print

    # computing posterior as model selection criterion
    temp = DiscreteDistribution(model.G, model.pi)
    pi_prior = model.prior.piPrior.pdf(temp)
    log_prior = pi_prior
    log_prior_list = [0.0] * model.dist_nr
    for j in range(model.dist_nr):
        for r in range(model.G):
            log_prior_list[j] += model.prior.compPrior[j].pdf(
                model.components[r][j])

        #    log_prior += sum(log_prior_list)
        #
        #    # prior over number of components
        #    log_prior += model.prior.nrCompPrior * model.G
        #    # prior over number of distinct groups
        #    for j in range(model.dist_nr):
        #        log_prior += model.prior.structPrior * len(model.leaders[j])
        #
        #    # get posterior
        #    lk = np.sum(sum_logs)
        #    best_post = lk + log_prior
        #    if not silent:
        #        print best_structure,':'
        #        print "0: ",  lk ,"+", log_prior,"=", best_post
        #        #print log_prior_list

    best_post = float('-inf')

    # initialize merge histories
    L = [{} for j in range(model.dist_nr)]
    for j in range(model.dist_nr):

        # extracting current feature from the DataSet
        if isinstance(model.components[0][j], MixtureModel):  # XXX
            data_j = data.singleFeatureSubset(j)
        else:
            data_j = data.getInternalFeature(j)

        for lead in range(
                model.G):  # every component is a leader for initialization

            el_dist = copy.copy(model.components[lead][j])
            tau_pool = copy.copy(tau[lead, :])
            pi_pool = model.pi[lead]

            if objFunction == 'MAP':
                model.prior.compPrior[j].mapMStep(el_dist, tau_pool, data_j,
                                                  pi_pool)
            else:
                # should never get here...
                raise TypeError

            stat = el_dist.sufficientStatistics(tau_pool, data_j)

            M = CandidateGroup(el_dist, np.sum(tau_pool), pi_pool, stat)

            l_row = el_dist.pdf(data_j)
            M.l = l_row
            M.dist_prior = model.prior.compPrior[j].pdf(el_dist)

            L[j][(lead, )] = M

    g_wo_j = np.zeros((model.G, data.N), dtype='Float64')
    best_indices = copy.copy(curr_indices)
    while 1:

        if not silent:
            print '\n----------------------------------'

        curr_indices[0] += 1

        if curr_indices[0] > max_ind:

            #curr_indices[lpos] = 0
            for e in range(model.dist_nr):
                if e == model.dist_nr - 1:
                    if curr_indices[e] > max_ind:
                        term = 1
                        break

                if curr_indices[e] > max_ind:
                    curr_indices[e] = 0
                    curr_indices[e + 1] += 1

        if term:
            break

        #print '\nprev:',prev_indices
        if not silent:
            print nr, ':', curr_indices, '->', [P[jj] for jj in curr_indices]

        g_wo_prev = copy.copy(g)
        g_this_struct = np.zeros((model.G, data.N))
        for j in range(model.dist_nr):
            if prev_indices[j] == curr_indices[j]:
                #print '   -> unchanged',j,curr_indices[j], P[curr_indices[j]]
                break
            else:
                #print '\n--------\nChanged',j,curr_indices[j], P[curr_indices[j]]
                curr_struct_j = P[curr_indices[j]]

                # unnormalized posterior matrix without the contribution of the jth feature
                try:
                    g_wo_prev = g_wo_prev - l[j]
                except FloatingPointError:
                    # if there was an exception we have to compute each
                    # entry in g_wo_j seperately to set -inf - -inf = -inf
                    g_wo_prev = mixextend.substract_matrix(g_wo_prev, l[j])

                # extracting current feature from the DataSet
                if isinstance(model.components[0][j], MixtureModel):  # XXX
                    data_j = data.singleFeatureSubset(j)
                else:
                    data_j = data.getInternalFeature(j)

                l_j_1 = np.zeros(
                    (model.G, data.N))  # XXX needs only be done once

                #print '\n\n***', curr_struct_j

                for cs_j in curr_struct_j:

                    #print '    ->',cs_j

                    if L[j].has_key(cs_j):
                        #print '  ** REcomp',cs_j

                        # retrieve merge data from history
                        candidate_dist = L[j][cs_j].dist

                        if not silent:
                            print j, "  R  candidate:", cs_j, candidate_dist

                        l_row = L[j][cs_j].l
                        #cdist_prior = L[j][cs_j].dist_prior

                    else:
                        #print '  ** comp',cs_j

                        M = model.prior.compPrior[j].mapMStepMerge(
                            [L[j][(c, )] for c in cs_j])

                        #print '\n   *** compute:',hist_ind1,hist_ind2

                        candidate_dist = M.dist

                        if not silent:
                            print j, "  C  candidate:", cs_j, candidate_dist

                        l_row = candidate_dist.pdf(data_j)

                        #print '   l_row=',l_row

                        #cdist_prior =

                        M.l = l_row
                        M.dist_prior = model.prior.compPrior[j].pdf(
                            candidate_dist)

                        L[j][cs_j] = M

                    for c in cs_j:
                        l_j_1[c, :] = l_row
                        #print '            ->',c
                        #g_this_struct[c,:] += l_row

                g_this_struct += l_j_1
                l[j] = l_j_1

                # compute parameter prior for the candidate merge parameters
                log_prior_list_j = 0.0
                for r in curr_struct_j:
                    log_prior_list_j += L[j][r].dist_prior * len(r)
                log_prior_list[j] = log_prior_list_j

        # get updated unnormalized posterior matrix
        g_1 = g_wo_prev + g_this_struct

        #                print '\ng_wo_j:'
        #                for gg in g_wo_j:
        #                    print gg.tolist()
        #
        #                print '\nl_j_1:'
        #                for gg in l_j_1:
        #                    print gg.tolist()
        #
        #
        #        print '\ng_1:'
        #        for gg in g_1:
        #            print gg.tolist()

        sum_logs = matrix_sum_logs(g_1)
        lk_1 = np.sum(sum_logs)

        #print '\n  *** likelihood =', lk_1

        # computing posterior as model selection criterion
        log_prior_1 = pi_prior

        #print r, L[r].dist_prior * len(r),L[r].dist_prior

        #print '\nlog_prior_list_j =',log_prior_list_j
        #print 'log_prior_list',log_prior_list

        log_prior_1 += sum(log_prior_list)

        #print '2:',log_prior_1

        # prior over number of components
        log_prior_1 += model.prior.nrCompPrior * model.G
        # prior over number of distinct groups
        for z in range(model.dist_nr):
            log_prior_1 += model.prior.structPrior * len(P[curr_indices[z]])

        #print '3:',log_prior_1

        post_1 = lk_1 + log_prior_1

        if not silent:
            print '\nPosterior:', post_1, '=', lk_1, '+', log_prior_1

        if post_1 >= best_post:  # current candidate structure is better than previous best
            if not silent:
                print "*** New best candidate", post_1, ">=", best_post

            if post_1 == best_post:
                print '******* Identical maxima !'
                print 'current:', curr_indices
                print 'best:', best_indices

            best_indices = copy.copy(curr_indices)
            best_post = post_1

        nr += 1
        g = g_1  # set likelihood matrix for the next candidate structure

        # XXX DEBUG XXX
        #if nr > 500:
        #    term = 1

        prev_indices = copy.copy(curr_indices)

    # setting updated structure in model
    for j in range(model.dist_nr):
        lead = []
        groups = {}

        #print j,best_indices[j]

        best_partition = P[best_indices[j]]
        for gr in best_partition:
            gr_list = list(gr)
            gr_lead = gr_list.pop(0)
            lead.append(gr_lead)
            groups[gr_lead] = gr_list

            # assigning distributions according to new structure
            model.components[gr_lead][j] = L[j][gr].dist
            for d in gr_list:
                model.components[d][j] = model.components[gr_lead][j]

        model.leaders[j] = lead
        model.groups[j] = groups

    #    print '** G=',model.G
    #    print '** p=',model.dist_nr
    #    print '** nr =',nr

    if not silent:
        print '\n*** Globally optimal structure out of', nr, 'possible:'
        print[P[best_indices[j]] for j in range(model.dist_nr)]
Exemple #3
0
def getRandomCSIMixture_conditionalDists(G,
                                         p,
                                         KL_lower,
                                         KL_upper,
                                         M=8,
                                         dtypes='discgauss',
                                         seed=None,
                                         fullstruct=False,
                                         disc_sampling_dist=None):

    #    if seed:
    #        random.seed(seed)
    #        mixextend.set_gsl_rng_seed(seed)
    #        #print '*** seed=',seed
    #
    #    else: # XXX debug
    #        seed = random.randint(1,9999999)
    #        mixextend.set_gsl_rng_seed(seed)
    #        random.seed(seed)
    #        #print '*** seed=',seed

    if disc_sampling_dist == None:
        discSamp = DirichletPrior(M, [1.0] * M)  # uniform sampling
    else:
        discSamp = disc_sampling_dist

    min_sigma = 0.3  # minimal std for Normal
    max_sigma = 5.0  # maximal std for Normal
    min_mu = -25.0  # minimal mean
    max_mu = 25.0  # maximal mean

    assert dtypes in ['disc', 'gauss', 'discgauss']

    if dtypes == 'disc':
        featureTypes = [0] * p
    elif dtypes == 'gauss':
        featureTypes = [1] * p
    elif dtypes == 'discgauss':
        # discrete or Normal features for now, chosen uniformly
        # 0 discrete, 1 Normal
        featureTypes = [random.choice((0, 1)) for i in range(p)]
    else:
        raise TypeError

    #print featureTypes

    # generate random CSI structures

    if G < 15:
        P = setPartitions.generate_all_partitions(
            G)  # XXX too slow for large G
    #print P

    C = []

    leaders = []
    groups = []
    for j in range(p):
        c_j = {}

        leaders_j = []
        groups_j = {}

        if fullstruct == True:
            struct_j = [(i, ) for i in range(G)]

        elif G < 15:
            struct_j = random.choice(P)
        else:
            print 'WARNING: improper structure sampling !'
            struct_j = setPartitions.get_random_partition(G)

        #print '\nstruct',j,struct_j

        for i, grp in enumerate(struct_j):

            lg = list(grp)

            #print lg

            lgj = lg.pop(0)

            #print lgj

            leaders_j.append(lgj)
            groups_j[lgj] = lg

            max_tries = 100000
            tries = 0

            if featureTypes[j] == 0:
                acc = 0

                while acc == 0:
                    cand = discSamp.sample()

                    #print 'Cand:', cand

                    acc = 1
                    for d in c_j:
                        KL_dist = sym_kl_dist(c_j[d], cand)

                        #print c_j[d],cand, KL_dist

                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError, 'Failed to find separated parameters !'

                for cind in grp:
                    c_j[cind] = cand

            elif featureTypes[j] == 1:
                acc = 0
                while acc == 0:
                    mu = random.uniform(min_mu, max_mu)
                    sigma = random.uniform(min_sigma, max_sigma)
                    cand = NormalDistribution(mu, sigma)
                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(c_j[d], cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError

                #    print '.',
                #print

                for cind in grp:
                    c_j[cind] = cand

            else:
                RuntimeError

        leaders.append(leaders_j)
        groups.append(groups_j)

        C.append(c_j)

    comps = []
    for i in range(G):
        comps.append(ProductDistribution([C[j][i] for j in range(p)]))

    pi = get_random_pi(G, 0.3 / G)
    #print '** pi =',pi

    # create prior
    piprior = DirichletPrior(G, [2.0] * G)

    cprior = []
    for j in range(p):
        if featureTypes[j] == 0:
            cprior.append(DirichletPrior(M, [1.02] * M))

        elif featureTypes[j] == 1:
            cprior.append(NormalGammaPrior(
                0, 0, 0, 0))  # dummy parameters, to be set later

        else:
            RuntimeError

    mprior = MixtureModelPrior(0.1, 0.1, piprior, cprior)

    m = BayesMixtureModel(G, pi, comps, mprior, struct=1)
    m.leaders = leaders
    m.groups = groups

    m.identifiable()
    m.updateFreeParams()
    #print m

    return m