Ejemplo n.º 1
0
def break_b_stick(models, sample, params):
    global a_max, b_max, g_max
    
    b_max += 1
    
    ## Keep the stick with cont and copy it over to beta
    break_beta_stick(models.cont, sample.gamma)
    models.start.beta = models.cont.beta
    
    ## Add a column to both output distributions:
    add_model_column(models.cont)
    add_model_column(models.start)
    
    ## Add a row to the POS output distribution which depends only on b:
    add_model_row_simple(models.pos, sample.alpha_g * sample.beta_g[1:])
    
    ## Several models depend on both b & g: Fork (boolean), Trans (boolean), Cont (awaited).
    ## Since g is the "inside" variable, when we increase b we just add a block of distributions
    ## the size of g to the end (in contrast, when we break the g stick [just below] 
    ## we intermittently add rows)
    models.cont.dist = np.append(models.cont.dist, np.zeros((g_max, models.cont.dist.shape[1])), 0)
    models.cont.pairCounts = np.append(models.cont.pairCounts, np.zeros((g_max, models.cont.pairCounts.shape[1])), 0)
    models.fork.dist = np.append(models.fork.dist, np.zeros((g_max, 2)), 0)
    models.fork.pairCounts = np.append(models.fork.pairCounts, np.zeros((g_max,2)), 0)
    
    bg = (b_max-1) * g_max
    for g in range(0, g_max):
        new_cont = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:]))
        models.cont.dist[bg + g,0] = -np.inf
        models.cont.dist[bg + g,1:] = new_cont
    
        models.fork.dist[bg + g,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
Ejemplo n.º 2
0
def add_model_row_simple(model, base):
    num_outs = model.dist.shape[1]
    model.pairCounts = np.append(model.pairCounts, np.zeros((1,num_outs)), 0)
    model.dist = np.append(model.dist, np.zeros((1,num_outs)), 0)
    model.dist[-1,0] = -np.inf
    model.dist[-1,1:] = np.log10(sampler.sampleSimpleDirichlet(model.pairCounts[-1,1:] + base))
    if np.argwhere(np.isnan(model.dist)).size > 0:
        logging.error("Addition of column resulted in nan!")
Ejemplo n.º 3
0
def break_a_stick(models, sample, params):
    global a_max, b_max, g_max
    
    a_max += 1
    
    ## Break the a stick (stored in root by convention -- TODO --  move this out to its own class later)
    break_beta_stick(models.root, sample.gamma)
    models.act.beta = models.root.beta
    
    ## Add a column to each of the out distributions (ACT and ROOT)
    add_model_column(models.root)
    add_model_column(models.act)  
    
    ## Add a row to the j distribution (TODO)
    ## Add a row to the ACT distributions (which depends on a_{t-1})
    add_model_row_simple(models.act, models.act.alpha * models.act.beta[1:])
    
    ## For boolean variables can't do the simple row add:
    models.reduce.pairCounts = np.append(models.reduce.pairCounts, np.zeros((1,2)), 0)
    new_dist = np.log10([[0.5, 0.5]])
    models.reduce.dist = np.append(models.reduce.dist, new_dist, 0)
    
    old_start = models.start.pairCounts
    models.start.pairCounts = np.zeros((a_max*a_max,b_max))
    old_start_dist = models.start.dist
    models.start.dist = np.zeros((a_max*a_max,b_max))
    old_start_ind = 0
    
    ## Add intermittent rows to the start distribution (depends on a_{t-1}, a_t)
    ## Special case -- because both variables are 'a', we can't go all the way to a_max in the
    ## range variable -- that will take us too far. The last case we will handle just below
    ## this loop and do all at once.
    for a in range(0,a_max-1):
        aa = a * a_max
        models.start.pairCounts[aa:aa+a_max-1,:] = old_start[old_start_ind:old_start_ind+a_max-1,:]
        models.start.dist[aa:aa+a_max-1,:] = old_start_dist[old_start_ind:old_start_ind+a_max-1,:]
        models.start.dist[aa+a_max-1,0] = -np.inf
        models.start.dist[aa+a_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(sample.alpha_b * sample.beta_b[1:]))
        old_start_ind += a_max - 1

    ## Also need to add a whole block at the end
    aa = a_max * (a_max - 1)
    for a in range(0,a_max):
        models.start.dist[aa+a,0] = -np.inf
        models.start.dist[aa+a,1:] = np.log10(sampler.sampleSimpleDirichlet(sample.alpha_b * sample.beta_b[1:]))
Ejemplo n.º 4
0
def break_g_stick(models, sample, params):
    global a_max, b_max, g_max
    
    g_max += 1
    num_conds = models.pos.dist.shape[0]

    ## Resample beta when the stick is broken:
    break_beta_stick(models.pos, sample.gamma)
    
    if models.pos.beta[-1] == 0.0:
        logging.error("This shouldn't be 0!")
    
    ## Add a column to the distribution that outputs POS tags:
    add_model_column(models.pos)

    ## Add a row to the lexical distribution for this new POS tag:
    add_model_row_simple(models.lex, params['h'][0,1:])
    
    ## Add a row to the active (a) model for the new conditional value of g 
    add_model_row_simple(models.root, models.root.alpha * models.root.beta[1:])
    
    ## The slightly trickier case of distributions which depend on g as well as
    ## other variables (in this case, both depend on b) : Need to grab out slices of 
    ## distributions and insert into new model with gaps in interior rows

    ## Add rows to the input distributions for all the models dependent on g
    ## at the next time step: (trans [not used yet], cont)
    old_cont = models.cont.pairCounts
    models.cont.pairCounts = np.zeros((b_max*g_max,b_max))
    old_cont_dist = models.cont.dist
    models.cont.dist = np.zeros((b_max*g_max,b_max))
    
    old_cont_ind = 0
    
    old_fork = models.fork.pairCounts
    models.fork.pairCounts = np.zeros((b_max*g_max,2))
    old_fork_dist = models.fork.dist
    models.fork.dist = np.zeros((b_max*g_max,2))
    
    for b in range(0, b_max):
        bg = b * g_max
        models.cont.pairCounts[bg:bg+g_max-1,:] = old_cont[old_cont_ind:old_cont_ind+g_max-1,:]
        models.cont.dist[bg:bg+g_max-1,:] = old_cont_dist[old_cont_ind:old_cont_ind+g_max-1,:]
        models.cont.dist[bg+g_max-1,0] = -np.inf
        models.cont.dist[bg+g_max-1,1:] = np.log10(sampler.sampleSimpleDirichlet(models.cont.alpha * models.cont.beta[1:]))
        
        models.fork.pairCounts[bg:bg+g_max-1,:] = old_fork[old_cont_ind:old_cont_ind+g_max-1,:]
        models.fork.dist[bg:bg+g_max-1,:] = old_fork_dist[old_cont_ind:old_cont_ind+g_max-1,:]
        models.fork.dist[bg+g_max-1,:] = np.log10(sampler.sampleSimpleBernoulli(sample.alpha_f * sample.beta_f))
        
        old_cont_ind = old_cont_ind + g_max - 1