def canIrecurse(self, data, trueset):
        d = [(datum.word, datum.X, datum.Y) for datum in data]

        hyps = [self.value[w] for w in self.all_words()]
        try:
            grammar = hyps[0].grammar
        except:
            return True # Because if it doesn't have a grammar it's a force function
        counts, inx, _ = create_counts(grammar, hyps)
        counts = np.sum(counts['SET'], axis=0)
        relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_']

        F1s = []
        for wi, w in enumerate(self.all_words()):
            wd = [dp for dp in d if dp[0] == w] # Word Data
            pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data
            pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed
            precision = float(len(set(pId))) / float(len(pw) + 1e-6)
            recall = float(len(pId)) / float(len(wd) + 1e-6)
            f1 = (2.*precision*recall) / (precision + recall + 1e-6)
            i = [ri[1] for ri in relinx if ri[0] == q(w)]
            F1s.append((counts[i], w, f1, precision, recall))
            if counts[i] >= 1 and f1 <= self.alpha * 2./ 3.:
                return False

        return True
Exemple #2
0
    def canIrecurse(self, data, trueset):
        d = [(datum.word, datum.X, datum.Y) for datum in data]

        hyps = [self.value[w] for w in self.all_words()]
        try:
            grammar = hyps[0].grammar
        except:
            return True  # Because if it doesn't have a grammar it's a force function
        counts, inx, _ = create_counts(grammar, hyps)
        relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_']
        if len(relinx) == 0:
            return True
        counts = np.sum(counts['SET'], axis=0)

        F1s = []
        for wi, w in enumerate(self.all_words()):
            wd = [dp for dp in d if dp[0] == w]  # Word Data
            pw = [dp for dp in trueset if dp[0] == w]  # Proposed Word Data
            pId = [dp for dp in wd if dp in pw]  # Proposed Word Data Observed
            precision = float(len(set(pId))) / float(len(pw) + 1e-6)
            recall = float(len(pId)) / float(len(wd) + 1e-6)
            f1 = (2. * precision * recall) / (precision + recall + 1e-6)
            i = [ri[1] for ri in relinx if ri[0] == q(w)]
            F1s.append((counts[i], w, f1, precision, recall))
            if counts[i] >= 1 and f1 <= self.alpha * 2. / 3.:
                return False

        return True
Exemple #3
0
def compute_reuse_prior(lex):
    counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0]
    prior = 0.0
    for k in counts.keys():
        c = np.sum(counts[k], axis=0)
        prior += multdir([c], np.ones(len(c)) / float(len(c)))[0]

    return prior
def compute_reuse_prior(lex):
    counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0]
    prior =0.0
    for k in counts.keys():
        c = np.sum(counts[k], axis=0)
        prior += multdir([c], np.ones(len(c))/float(len(c)))[0]

    return prior
Exemple #5
0
print "# Loaded human data"

from Model.Data import concept2data
print "# Loaded concept2data"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the rule count matrices
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Inference.GrammarInference.Precompute import create_counts

from Model.Grammar import grammar

trees = [h.value for h in hypotheses]

nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None)

print "# Computed counts for each hypothesis & nonterminal"

# print counts

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build up the info about the data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

concepts = concept2data.keys()

NYes = []
NNo = []

# Make NYes and Nno
Exemple #6
0
def do_I_reuse(lex):
    counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0]
    return bool([1 for x in counts.values() if np.any(x > 1)])