Ejemplo n.º 1
0
def update_grammar_rules(grammar, res, delete=False, threshold=0.01, exclude = []):
    hyps = [res[h][0] for h in xrange(len(res))]
    probs = [res[h][1] for h in xrange(len(res))]

    cc = create_counts(grammar, hyps)
    # print cc[1]
    sigs = [i.get_rule_signature() for i in grammar]
    dif_sigs = []
    for s in sigs:
        if s[0] not in dif_sigs:
            dif_sigs.append(s[0])


    for g in grammar:
        sig = g.get_rule_signature()
        if sig[0] not in exclude:
            indx = cc[1][sig]
            g.p = 0
            for h in xrange(len(hyps)):
                rule_count = cc[0][sig[0]][h][indx]
                hyp = hyps[h]
                prob = probs[h]
                cplx = np.sum([np.sum(cc[0][s][h]) for s in dif_sigs])
                new_prior = (1.0 + rule_count) / (grammar.nrules() + cplx)
                g.p += new_prior * prob
            if delete and g.p < threshold:
                deleteRule(hyp)

    grammar.renormalize()
Ejemplo n.º 2
0
print "# Loaded human data"

from Model.Data import concept2data
print "# Loaded concept2data"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the rule count matrices
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.GrammarInference.Precompute import create_counts

from Model.Grammar import grammar

trees = [h.value for h in hypotheses]

nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None)

print "# Computed counts for each hypothesis & nonterminal"

# print counts

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build up the info about the data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

concepts = concept2data.keys()

NYes     = []
NNo      = []

# Make NYes and Nno
from LOTlib.GrammarInference.Precompute import create_counts

which_rules = [r for r in grammar if r.nt not in ['START']]

output_rules_helpers(which_rules, "grammar_inference/header.csv")

#for l1 in lsts:
#for l2 in lsts:
#l1 = lsts[0]
#l2 = lsts[1]

for trntrans in ["train", "trans"]:

    for training in xrange(len(conditioned_on)):
        counts, sig2idx, prior_offset = create_counts(grammar,
                                                      hypotheses,
                                                      which_rules=which_rules)

        ##############################################################
        #####################GET HUMAN YN COUNTS######################

        #data_dct = get_training_data("data/outR.csv", str(trntrans))
        #data_dct = get_data_by_condition("data/outR.csv",
        #l1, l2, str(trntrans))
        if trntrans == "train":
            data_dct = get_data_conditioned(
                "data/outR.csv",
                #conditioned_on[training],
                "all",
                '0')
        else:
Ejemplo n.º 4
0
from Model.Data import concept2data

print "# Loaded concept2data"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the rule count matrices
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.GrammarInference.Precompute import create_counts

from Model.Grammar import grammar

trees = [h.value for h in hypotheses]

nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None)

print "# Computed counts for each hypothesis & nonterminal"

# print counts

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build up the info about the data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

concepts = concept2data.keys()

NYes = []
NNo = []

# Make NYes and Nno
Ejemplo n.º 5
0
hypotheses = list(set([MyHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(1000)])) # list so order is maintained

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the rule count matrices
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# This stores each hypothesis vs a vector of counts of how often each nonterminal is used
# this is used via a matrix product with the log priors on the GPU to compute the prior
# (the (log)priors are the things we are trying to infer)

from LOTlib.GrammarInference.Precompute import create_counts

# Decide which rules to use
which_rules = [r for r in grammar if r.nt not in ['START']]

counts, sig2idx, prior_offset = create_counts(grammar, hypotheses, which_rules=which_rules)

print "# Computed counts for each hypothesis & nonterminal"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load the human data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import make_all_objects

objects = make_all_objects(size=['small', 'medium', 'large'],
                           color=['red', 'green', 'blue'],
                           shape=['square', 'triangle', 'circle'])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The data that learners observed
Ejemplo n.º 6
0
# For now, we'll just sample from the prior
hypotheses = list(set([MyHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(1000)])) # list so order is maintained

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the rule count matrices
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# This stores each hypothesis vs a vector of counts of how often each nonterminal is used
# this is used via a matrix product with the log priors on the GPU to compute the prior
# (the (log)priors are the things we are trying to infer)

from LOTlib.GrammarInference.Precompute import create_counts

trees = [h.value for h in hypotheses]

nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None)

print "# Computed counts for each hypothesis & nonterminal"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load the human data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import make_all_objects

objects = make_all_objects(size=['small', 'medium', 'large'],
                           color=['red', 'green', 'blue'],
                           shape=['square', 'triangle', 'circle'])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The data that learners observed