Esempio n. 1
0
def gibbs_sample_fixed_k(domain_inf, rng, impotent=False):

    T_N = domain_inf.entity_count()

    if impotent:
        print "gibbs_sample_type: IMPOTENT"

    for entity_pos in np.random.permutation(T_N):
        g = domain_inf.remove_entity_from_group(entity_pos)
        # if domain_inf.group_size(g) == 0:
        #     temp_group = g
        # else:
        #     temp_group = domain_inf.create_group(rng)


        groups = domain_inf.get_groups()
        scores = np.zeros(len(groups))
        for gi, group_id in enumerate(groups):
            scores[gi] = domain_inf.post_pred(group_id, entity_pos)
        #print entity_pos, scores
        sample_i = util.sample_from_scores(scores)
        new_group = groups[sample_i]

        if impotent:
            new_group = g

        domain_inf.add_entity_to_group(new_group, entity_pos)
Esempio n. 2
0
def gibbs_sample_type(domain_inf, rng, impotent=False):

    T_N = domain_inf.entity_count()

    if impotent:
        print "gibbs_sample_type: IMPOTENT"

    for entity_pos in np.random.permutation(T_N):
        g = domain_inf.remove_entity_from_group(entity_pos)
        if domain_inf.group_size(g) == 0:
            temp_group = g
        else:
            temp_group = domain_inf.create_group(rng)


        groups = domain_inf.get_groups()
        scores = domain_inf.post_pred_map(groups, entity_pos)
        #print entity_pos, scores
        sample_i = util.sample_from_scores(scores)
        new_group = groups[sample_i]

        if impotent:
            new_group = g

        domain_inf.add_entity_to_group(new_group, entity_pos)
        if new_group != temp_group:
            assert domain_inf.group_size(temp_group) == 0
            domain_inf.delete_group(temp_group)
Esempio n. 3
0
def relation_hp_grid(model, rng, grids, threadpool=None):
    """ add the ability to have per-relation grids 

    If the grid is 'None', don't do inference 

    """

    for relation_name, relation in model.relations.iteritems():
        model_name = relation.modeltypestr
        if relation_name in grids:
            grid = grids[relation_name]
        elif model_name in grids:
            grid = grids[model_name]
        else:
            raise RuntimeError("model %s is not in the provided grids" % model_name)

        if grid == None:
            continue

        if isinstance(relation, pyirmutil.Relation):
            ## THIS IS A TOTAL HACK we should not be dispatching this way
            ## fix in later version once we obsolte old code
            def set_func(val):
                relation.set_hps(val)

            def get_score():
                return relation.total_score()
            if grid == None:
                continue

            gridgibbshps.grid_gibbs(set_func, get_score, grid)
        else:
            scores = relation.score_at_hps(grid, threadpool)
            i = util.sample_from_scores(scores)
            relation.set_hps(grid[i])
Esempio n. 4
0
def gibbs_sample_type_nonconj(domain_inf, M, rng, impotent=False, 
                              threadpool = None):
    """
    Radford neal Algo 8 for non-conj models
    
    M is the number of ephemeral clusters
    
    We assume that every cluster in the model is currently used
    
    impotent: if true, we always assign the object back to its original
    cluster. Useful for benchmarking
    
    """
    T_N = domain_inf.entity_count()

    if impotent:
        print "gibbs_sample_type_nonconj IMPOTENT"

    if T_N == 1:
        return # nothing to do 

    for entity_pos in range(T_N):
        g = domain_inf.remove_entity_from_group(entity_pos)
        extra_groups = []
        if domain_inf.group_size(g) == 0:
            extra_groups.append(g)
        while len(extra_groups) < M:
            extra_groups.append(domain_inf.create_group(rng))

        groups = domain_inf.get_groups()
        scores = domain_inf.post_pred_map(groups, entity_pos, 
                                          threadpool)

            # correct the score for the empty groups
        for gi, group_id in enumerate(groups):
            if group_id in extra_groups:
                scores[gi] -= np.log(M)
        #print entity_pos, scores
        sample_i = util.sample_from_scores(scores)
        if impotent: 
            new_group = g
        else:
            new_group = groups[sample_i]

        domain_inf.add_entity_to_group(new_group, entity_pos)
        for eg in extra_groups:
            if domain_inf.group_size(eg) == 0:
                domain_inf.delete_group(eg)
Esempio n. 5
0
def sequential_init(model, rng, M=10):
    """
    This is a sequential gibbs-style initialization. We require a model
    to be fully specified before we do this. Note that we obliterate
    all existing structural state -- components, suffstats, etc. 

    To handle the multidomain case, we randomly switch between domains
    as we do the sequential build-up. 

    Note we do neal-algo-8-style creation of ephemeral groups here

    """
    for domain_name, domain_obj in model.domains.iteritems():
        irmio.empty_domain(domain_obj)
    d_o_map = {}
    # develop ordering 
    for domain_name, domain_obj in model.domains.iteritems():
        unassigned_objs = np.random.permutation(domain_obj.entity_count()).tolist()
        d_o_map[domain_name] = unassigned_objs
    
    # now create a single group for everyone
    for domain_name, domain_obj in model.domains.iteritems():
        g = domain_obj.create_group(rng)
        domain_obj.add_entity_to_group(g, d_o_map[domain_name].pop())

    # now each domain has exactly one currently-assigned group
    
    # flatten the ordering, shuffle
    all_ent = []
    for dn, do in d_o_map.iteritems():
        all_ent += [(dn, di) for di in do]
    np.random.shuffle(all_ent)
    
    for domain_name, entity_pos in all_ent:
        domain_obj = model.domains[domain_name]

        extra_groups = [domain_obj.create_group(rng) for _ in range(M)]

        groups = domain_obj.get_groups()
        scores = np.zeros(len(groups))
        
        for gi, group_id in enumerate(groups):
            scores[gi] = domain_obj.post_pred(group_id, entity_pos)
            # correct the score for the empty groups
            if group_id in extra_groups:
                scores[gi] -= np.log(M)
        #print entity_pos, scores
        sample_i = util.sample_from_scores(scores)
        new_group = groups[sample_i]

        domain_obj.add_entity_to_group(new_group, entity_pos)
        for eg in extra_groups:
            if domain_obj.group_size(eg) == 0:
                domain_obj.delete_group(eg)
    
    # now the model init should ... be good
    # debug
    for domain_name, domain_obj in model.domains.iteritems():
        print domain_name, "groups:", 
        i = 0
        for g in domain_obj.get_groups():
            j = domain_obj.group_size(g)
            print j, 
            i += j
        print
        print "total entities", i