def break_b_stick(models, sample, params): global a_max, b_max, g_max b_max += 1 ## Keep the stick with cont and copy it over to beta break_beta_stick(models.cont, sample.gamma) models.start.beta = models.cont.beta ## Add a column to both output distributions: add_model_column(models.cont) add_model_column(models.start) ## Add a row to the POS output distribution which depends only on b: add_model_row_simple(models.pos, sample.alpha_g * sample.beta_g[1:]) ## Several models depend on both b & g: Fork (boolean), Trans (boolean), Cont (awaited). ## Since g is the "inside" variable, when we increase b we just add a block of distributions ## the size of g to the end (in contrast, when we break the g stick [just below] ## we intermittently add rows) models.cont.dist = np.append(models.cont.dist, np.zeros((g_max, models.cont.dist.shape[1])), 0) models.cont.pairCounts = np.append(models.cont.pairCounts, np.zeros((g_max, models.cont.pairCounts.shape[1])), 0) models.fork.dist = np.append(models.fork.dist, np.zeros((g_max, 2)), 0) models.fork.pairCounts = np.append(models.fork.pairCounts, np.zeros((g_max,2)), 0) bg = (b_max-1) * g_max for g in range(0, g_max): new_cont = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:])) models.cont.dist[bg + g,0] = -np.inf models.cont.dist[bg + g,1:] = new_cont models.fork.dist[bg + g,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
def add_model_row_simple(model, base): num_outs = model.dist.shape[1] model.pairCounts = np.append(model.pairCounts, np.zeros((1,num_outs)), 0) model.dist = np.append(model.dist, np.zeros((1,num_outs)), 0) model.dist[-1,0] = -np.inf model.dist[-1,1:] = np.log10(sampler.sampleSimpleDirichlet(model.pairCounts[-1,1:] + base)) if np.argwhere(np.isnan(model.dist)).size > 0: logging.error("Addition of column resulted in nan!")
def break_a_stick(models, sample, params): global a_max, b_max, g_max a_max += 1 ## Break the a stick (stored in root by convention -- TODO -- move this out to its own class later) break_beta_stick(models.root, sample.gamma) models.act.beta = models.root.beta ## Add a column to each of the out distributions (ACT and ROOT) add_model_column(models.root) add_model_column(models.act) ## Add a row to the j distribution (TODO) ## Add a row to the ACT distributions (which depends on a_{t-1}) add_model_row_simple(models.act, models.act.alpha * models.act.beta[1:]) ## For boolean variables can't do the simple row add: models.reduce.pairCounts = np.append(models.reduce.pairCounts, np.zeros((1,2)), 0) new_dist = np.log10([[0.5, 0.5]]) models.reduce.dist = np.append(models.reduce.dist, new_dist, 0) old_start = models.start.pairCounts models.start.pairCounts = np.zeros((a_max*a_max,b_max)) old_start_dist = models.start.dist models.start.dist = np.zeros((a_max*a_max,b_max)) old_start_ind = 0 ## Add intermittent rows to the start distribution (depends on a_{t-1}, a_t) ## Special case -- because both variables are 'a', we can't go all the way to a_max in the ## range variable -- that will take us too far. The last case we will handle just below ## this loop and do all at once. for a in range(0,a_max-1): aa = a * a_max models.start.pairCounts[aa:aa+a_max-1,:] = old_start[old_start_ind:old_start_ind+a_max-1,:] models.start.dist[aa:aa+a_max-1,:] = old_start_dist[old_start_ind:old_start_ind+a_max-1,:] models.start.dist[aa+a_max-1,0] = -np.inf models.start.dist[aa+a_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(sample.alpha_b * sample.beta_b[1:])) old_start_ind += a_max - 1 ## Also need to add a whole block at the end aa = a_max * (a_max - 1) for a in range(0,a_max): models.start.dist[aa+a,0] = -np.inf models.start.dist[aa+a,1:] = np.log10(sampler.sampleSimpleDirichlet(sample.alpha_b * sample.beta_b[1:]))
def break_g_stick(models, sample, params): global a_max, b_max, g_max g_max += 1 num_conds = models.pos.dist.shape[0] ## Resample beta when the stick is broken: break_beta_stick(models.pos, sample.gamma) if models.pos.beta[-1] == 0.0: logging.error("This shouldn't be 0!") ## Add a column to the distribution that outputs POS tags: add_model_column(models.pos) ## Add a row to the lexical distribution for this new POS tag: add_model_row_simple(models.lex, params['h'][0,1:]) ## Add a row to the active (a) model for the new conditional value of g add_model_row_simple(models.root, models.root.alpha * models.root.beta[1:]) ## The slightly trickier case of distributions which depend on g as well as ## other variables (in this case, both depend on b) : Need to grab out slices of ## distributions and insert into new model with gaps in interior rows ## Add rows to the input distributions for all the models dependent on g ## at the next time step: (trans [not used yet], cont) old_cont = models.cont.pairCounts models.cont.pairCounts = np.zeros((b_max*g_max,b_max)) old_cont_dist = models.cont.dist models.cont.dist = np.zeros((b_max*g_max,b_max)) old_cont_ind = 0 old_fork = models.fork.pairCounts models.fork.pairCounts = np.zeros((b_max*g_max,2)) old_fork_dist = models.fork.dist models.fork.dist = np.zeros((b_max*g_max,2)) for b in range(0, b_max): bg = b * g_max models.cont.pairCounts[bg:bg+g_max-1,:] = old_cont[old_cont_ind:old_cont_ind+g_max-1,:] models.cont.dist[bg:bg+g_max-1,:] = old_cont_dist[old_cont_ind:old_cont_ind+g_max-1,:] models.cont.dist[bg+g_max-1,0] = -np.inf models.cont.dist[bg+g_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:])) models.fork.pairCounts[bg:bg+g_max-1,:] = old_fork[old_cont_ind:old_cont_ind+g_max-1,:] models.fork.dist[bg:bg+g_max-1,:] = old_fork_dist[old_cont_ind:old_cont_ind+g_max-1,:] models.fork.dist[bg+g_max-1,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f)) old_cont_ind = old_cont_ind + g_max - 1