def update_grammar_rules(grammar, res, delete=False, threshold=0.01, exclude = []): hyps = [res[h][0] for h in xrange(len(res))] probs = [res[h][1] for h in xrange(len(res))] cc = create_counts(grammar, hyps) # print cc[1] sigs = [i.get_rule_signature() for i in grammar] dif_sigs = [] for s in sigs: if s[0] not in dif_sigs: dif_sigs.append(s[0]) for g in grammar: sig = g.get_rule_signature() if sig[0] not in exclude: indx = cc[1][sig] g.p = 0 for h in xrange(len(hyps)): rule_count = cc[0][sig[0]][h][indx] hyp = hyps[h] prob = probs[h] cplx = np.sum([np.sum(cc[0][s][h]) for s in dif_sigs]) new_prior = (1.0 + rule_count) / (grammar.nrules() + cplx) g.p += new_prior * prob if delete and g.p < threshold: deleteRule(hyp) grammar.renormalize()
print "# Loaded human data" from Model.Data import concept2data print "# Loaded concept2data" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get the rule count matrices # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.GrammarInference.Precompute import create_counts from Model.Grammar import grammar trees = [h.value for h in hypotheses] nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None) print "# Computed counts for each hypothesis & nonterminal" # print counts # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build up the info about the data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concepts = concept2data.keys() NYes = [] NNo = [] # Make NYes and Nno
from LOTlib.GrammarInference.Precompute import create_counts which_rules = [r for r in grammar if r.nt not in ['START']] output_rules_helpers(which_rules, "grammar_inference/header.csv") #for l1 in lsts: #for l2 in lsts: #l1 = lsts[0] #l2 = lsts[1] for trntrans in ["train", "trans"]: for training in xrange(len(conditioned_on)): counts, sig2idx, prior_offset = create_counts(grammar, hypotheses, which_rules=which_rules) ############################################################## #####################GET HUMAN YN COUNTS###################### #data_dct = get_training_data("data/outR.csv", str(trntrans)) #data_dct = get_data_by_condition("data/outR.csv", #l1, l2, str(trntrans)) if trntrans == "train": data_dct = get_data_conditioned( "data/outR.csv", #conditioned_on[training], "all", '0') else:
from Model.Data import concept2data print "# Loaded concept2data" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get the rule count matrices # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.GrammarInference.Precompute import create_counts from Model.Grammar import grammar trees = [h.value for h in hypotheses] nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None) print "# Computed counts for each hypothesis & nonterminal" # print counts # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build up the info about the data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concepts = concept2data.keys() NYes = [] NNo = [] # Make NYes and Nno
hypotheses = list(set([MyHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(1000)])) # list so order is maintained # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get the rule count matrices # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # This stores each hypothesis vs a vector of counts of how often each nonterminal is used # this is used via a matrix product with the log priors on the GPU to compute the prior # (the (log)priors are the things we are trying to infer) from LOTlib.GrammarInference.Precompute import create_counts # Decide which rules to use which_rules = [r for r in grammar if r.nt not in ['START']] counts, sig2idx, prior_offset = create_counts(grammar, hypotheses, which_rules=which_rules) print "# Computed counts for each hypothesis & nonterminal" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Load the human data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['small', 'medium', 'large'], color=['red', 'green', 'blue'], shape=['square', 'triangle', 'circle']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The data that learners observed
# For now, we'll just sample from the prior hypotheses = list(set([MyHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(1000)])) # list so order is maintained # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get the rule count matrices # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # This stores each hypothesis vs a vector of counts of how often each nonterminal is used # this is used via a matrix product with the log priors on the GPU to compute the prior # (the (log)priors are the things we are trying to infer) from LOTlib.GrammarInference.Precompute import create_counts trees = [h.value for h in hypotheses] nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None) print "# Computed counts for each hypothesis & nonterminal" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Load the human data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['small', 'medium', 'large'], color=['red', 'green', 'blue'], shape=['square', 'triangle', 'circle']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The data that learners observed