def break_b_stick(models, sample, params): global a_max, b_max, g_max b_max += 1 ## Keep the stick with cont and copy it over to beta break_beta_stick(models.cont, sample.gamma) models.start.beta = models.cont.beta ## Add a column to both output distributions: add_model_column(models.cont) add_model_column(models.start) ## Add a row to the POS output distribution which depends only on b: add_model_row_simple(models.pos, sample.alpha_g * sample.beta_g[1:]) ## Several models depend on both b & g: Fork (boolean), Trans (boolean), Cont (awaited). ## Since g is the "inside" variable, when we increase b we just add a block of distributions ## the size of g to the end (in contrast, when we break the g stick [just below] ## we intermittently add rows) models.cont.dist = np.append(models.cont.dist, np.zeros((g_max, models.cont.dist.shape[1])), 0) models.cont.pairCounts = np.append(models.cont.pairCounts, np.zeros((g_max, models.cont.pairCounts.shape[1])), 0) models.fork.dist = np.append(models.fork.dist, np.zeros((g_max, 2)), 0) models.fork.pairCounts = np.append(models.fork.pairCounts, np.zeros((g_max,2)), 0) bg = (b_max-1) * g_max for g in range(0, g_max): new_cont = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:])) models.cont.dist[bg + g,0] = -np.inf models.cont.dist[bg + g,1:] = new_cont models.fork.dist[bg + g,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
def break_g_stick(models, sample, params): global a_max, b_max, g_max g_max += 1 num_conds = models.pos.dist.shape[0] ## Resample beta when the stick is broken: break_beta_stick(models.pos, sample.gamma) if models.pos.beta[-1] == 0.0: logging.error("This shouldn't be 0!") ## Add a column to the distribution that outputs POS tags: add_model_column(models.pos) ## Add a row to the lexical distribution for this new POS tag: add_model_row_simple(models.lex, params['h'][0,1:]) ## Add a row to the active (a) model for the new conditional value of g add_model_row_simple(models.root, models.root.alpha * models.root.beta[1:]) ## The slightly trickier case of distributions which depend on g as well as ## other variables (in this case, both depend on b) : Need to grab out slices of ## distributions and insert into new model with gaps in interior rows ## Add rows to the input distributions for all the models dependent on g ## at the next time step: (trans [not used yet], cont) old_cont = models.cont.pairCounts models.cont.pairCounts = np.zeros((b_max*g_max,b_max)) old_cont_dist = models.cont.dist models.cont.dist = np.zeros((b_max*g_max,b_max)) old_cont_ind = 0 old_fork = models.fork.pairCounts models.fork.pairCounts = np.zeros((b_max*g_max,2)) old_fork_dist = models.fork.dist models.fork.dist = np.zeros((b_max*g_max,2)) for b in range(0, b_max): bg = b * g_max models.cont.pairCounts[bg:bg+g_max-1,:] = old_cont[old_cont_ind:old_cont_ind+g_max-1,:] models.cont.dist[bg:bg+g_max-1,:] = old_cont_dist[old_cont_ind:old_cont_ind+g_max-1,:] models.cont.dist[bg+g_max-1,0] = -np.inf models.cont.dist[bg+g_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:])) models.fork.pairCounts[bg:bg+g_max-1,:] = old_fork[old_cont_ind:old_cont_ind+g_max-1,:] models.fork.dist[bg:bg+g_max-1,:] = old_fork_dist[old_cont_ind:old_cont_ind+g_max-1,:] models.fork.dist[bg+g_max-1,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f)) old_cont_ind = old_cont_ind + g_max - 1