def gibbs_sample_fixed_k(domain_inf, rng, impotent=False): T_N = domain_inf.entity_count() if impotent: print "gibbs_sample_type: IMPOTENT" for entity_pos in np.random.permutation(T_N): g = domain_inf.remove_entity_from_group(entity_pos) # if domain_inf.group_size(g) == 0: # temp_group = g # else: # temp_group = domain_inf.create_group(rng) groups = domain_inf.get_groups() scores = np.zeros(len(groups)) for gi, group_id in enumerate(groups): scores[gi] = domain_inf.post_pred(group_id, entity_pos) #print entity_pos, scores sample_i = util.sample_from_scores(scores) new_group = groups[sample_i] if impotent: new_group = g domain_inf.add_entity_to_group(new_group, entity_pos)
def gibbs_sample_type(domain_inf, rng, impotent=False): T_N = domain_inf.entity_count() if impotent: print "gibbs_sample_type: IMPOTENT" for entity_pos in np.random.permutation(T_N): g = domain_inf.remove_entity_from_group(entity_pos) if domain_inf.group_size(g) == 0: temp_group = g else: temp_group = domain_inf.create_group(rng) groups = domain_inf.get_groups() scores = domain_inf.post_pred_map(groups, entity_pos) #print entity_pos, scores sample_i = util.sample_from_scores(scores) new_group = groups[sample_i] if impotent: new_group = g domain_inf.add_entity_to_group(new_group, entity_pos) if new_group != temp_group: assert domain_inf.group_size(temp_group) == 0 domain_inf.delete_group(temp_group)
def relation_hp_grid(model, rng, grids, threadpool=None): """ add the ability to have per-relation grids If the grid is 'None', don't do inference """ for relation_name, relation in model.relations.iteritems(): model_name = relation.modeltypestr if relation_name in grids: grid = grids[relation_name] elif model_name in grids: grid = grids[model_name] else: raise RuntimeError("model %s is not in the provided grids" % model_name) if grid == None: continue if isinstance(relation, pyirmutil.Relation): ## THIS IS A TOTAL HACK we should not be dispatching this way ## fix in later version once we obsolte old code def set_func(val): relation.set_hps(val) def get_score(): return relation.total_score() if grid == None: continue gridgibbshps.grid_gibbs(set_func, get_score, grid) else: scores = relation.score_at_hps(grid, threadpool) i = util.sample_from_scores(scores) relation.set_hps(grid[i])
def gibbs_sample_type_nonconj(domain_inf, M, rng, impotent=False, threadpool = None): """ Radford neal Algo 8 for non-conj models M is the number of ephemeral clusters We assume that every cluster in the model is currently used impotent: if true, we always assign the object back to its original cluster. Useful for benchmarking """ T_N = domain_inf.entity_count() if impotent: print "gibbs_sample_type_nonconj IMPOTENT" if T_N == 1: return # nothing to do for entity_pos in range(T_N): g = domain_inf.remove_entity_from_group(entity_pos) extra_groups = [] if domain_inf.group_size(g) == 0: extra_groups.append(g) while len(extra_groups) < M: extra_groups.append(domain_inf.create_group(rng)) groups = domain_inf.get_groups() scores = domain_inf.post_pred_map(groups, entity_pos, threadpool) # correct the score for the empty groups for gi, group_id in enumerate(groups): if group_id in extra_groups: scores[gi] -= np.log(M) #print entity_pos, scores sample_i = util.sample_from_scores(scores) if impotent: new_group = g else: new_group = groups[sample_i] domain_inf.add_entity_to_group(new_group, entity_pos) for eg in extra_groups: if domain_inf.group_size(eg) == 0: domain_inf.delete_group(eg)
def sequential_init(model, rng, M=10): """ This is a sequential gibbs-style initialization. We require a model to be fully specified before we do this. Note that we obliterate all existing structural state -- components, suffstats, etc. To handle the multidomain case, we randomly switch between domains as we do the sequential build-up. Note we do neal-algo-8-style creation of ephemeral groups here """ for domain_name, domain_obj in model.domains.iteritems(): irmio.empty_domain(domain_obj) d_o_map = {} # develop ordering for domain_name, domain_obj in model.domains.iteritems(): unassigned_objs = np.random.permutation(domain_obj.entity_count()).tolist() d_o_map[domain_name] = unassigned_objs # now create a single group for everyone for domain_name, domain_obj in model.domains.iteritems(): g = domain_obj.create_group(rng) domain_obj.add_entity_to_group(g, d_o_map[domain_name].pop()) # now each domain has exactly one currently-assigned group # flatten the ordering, shuffle all_ent = [] for dn, do in d_o_map.iteritems(): all_ent += [(dn, di) for di in do] np.random.shuffle(all_ent) for domain_name, entity_pos in all_ent: domain_obj = model.domains[domain_name] extra_groups = [domain_obj.create_group(rng) for _ in range(M)] groups = domain_obj.get_groups() scores = np.zeros(len(groups)) for gi, group_id in enumerate(groups): scores[gi] = domain_obj.post_pred(group_id, entity_pos) # correct the score for the empty groups if group_id in extra_groups: scores[gi] -= np.log(M) #print entity_pos, scores sample_i = util.sample_from_scores(scores) new_group = groups[sample_i] domain_obj.add_entity_to_group(new_group, entity_pos) for eg in extra_groups: if domain_obj.group_size(eg) == 0: domain_obj.delete_group(eg) # now the model init should ... be good # debug for domain_name, domain_obj in model.domains.iteritems(): print domain_name, "groups:", i = 0 for g in domain_obj.get_groups(): j = domain_obj.group_size(g) print j, i += j print print "total entities", i