Ejemplo n.º 1
0
def break_b_stick(models, sample, params):
    global a_max, b_max, g_max
    
    b_max += 1
    
    ## Keep the stick with cont and copy it over to beta
    break_beta_stick(models.cont, sample.gamma)
    models.start.beta = models.cont.beta
    
    ## Add a column to both output distributions:
    add_model_column(models.cont)
    add_model_column(models.start)
    
    ## Add a row to the POS output distribution which depends only on b:
    add_model_row_simple(models.pos, sample.alpha_g * sample.beta_g[1:])
    
    ## Several models depend on both b & g: Fork (boolean), Trans (boolean), Cont (awaited).
    ## Since g is the "inside" variable, when we increase b we just add a block of distributions
    ## the size of g to the end (in contrast, when we break the g stick [just below] 
    ## we intermittently add rows)
    models.cont.dist = np.append(models.cont.dist, np.zeros((g_max, models.cont.dist.shape[1])), 0)
    models.cont.pairCounts = np.append(models.cont.pairCounts, np.zeros((g_max, models.cont.pairCounts.shape[1])), 0)
    models.fork.dist = np.append(models.fork.dist, np.zeros((g_max, 2)), 0)
    models.fork.pairCounts = np.append(models.fork.pairCounts, np.zeros((g_max,2)), 0)
    
    bg = (b_max-1) * g_max
    for g in range(0, g_max):
        new_cont = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:]))
        models.cont.dist[bg + g,0] = -np.inf
        models.cont.dist[bg + g,1:] = new_cont
    
        models.fork.dist[bg + g,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
Ejemplo n.º 2
0
def break_g_stick(models, sample, params):
    global a_max, b_max, g_max
    
    g_max += 1
    num_conds = models.pos.dist.shape[0]

    ## Resample beta when the stick is broken:
    break_beta_stick(models.pos, sample.gamma)
    
    if models.pos.beta[-1] == 0.0:
        logging.error("This shouldn't be 0!")
    
    ## Add a column to the distribution that outputs POS tags:
    add_model_column(models.pos)

    ## Add a row to the lexical distribution for this new POS tag:
    add_model_row_simple(models.lex, params['h'][0,1:])
    
    ## Add a row to the active (a) model for the new conditional value of g 
    add_model_row_simple(models.root, models.root.alpha * models.root.beta[1:])
    
    ## The slightly trickier case of distributions which depend on g as well as
    ## other variables (in this case, both depend on b) : Need to grab out slices of 
    ## distributions and insert into new model with gaps in interior rows

    ## Add rows to the input distributions for all the models dependent on g
    ## at the next time step: (trans [not used yet], cont)
    old_cont = models.cont.pairCounts
    models.cont.pairCounts = np.zeros((b_max*g_max,b_max))
    old_cont_dist = models.cont.dist
    models.cont.dist = np.zeros((b_max*g_max,b_max))
    
    old_cont_ind = 0
    
    old_fork = models.fork.pairCounts
    models.fork.pairCounts = np.zeros((b_max*g_max,2))
    old_fork_dist = models.fork.dist
    models.fork.dist = np.zeros((b_max*g_max,2))
    
    for b in range(0, b_max):
        bg = b * g_max
        models.cont.pairCounts[bg:bg+g_max-1,:] = old_cont[old_cont_ind:old_cont_ind+g_max-1,:]
        models.cont.dist[bg:bg+g_max-1,:] = old_cont_dist[old_cont_ind:old_cont_ind+g_max-1,:]
        models.cont.dist[bg+g_max-1,0] = -np.inf
        models.cont.dist[bg+g_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:]))
        
        models.fork.pairCounts[bg:bg+g_max-1,:] = old_fork[old_cont_ind:old_cont_ind+g_max-1,:]
        models.fork.dist[bg:bg+g_max-1,:] = old_fork_dist[old_cont_ind:old_cont_ind+g_max-1,:]
        models.fork.dist[bg+g_max-1,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
        
        old_cont_ind = old_cont_ind + g_max - 1