Ejemplo n.º 1
0
 def step_assigner(self):
     from dice.reason import Assigner
     assigner = Assigner(self, verbose=self.parameters["verbose"])
     if bool(self.parameters["log"]):
         self.set_assignment(
             assigner.process(
                 variables_path=self.logger.path("gurobi_variables.tsv"),
                 constraints_path=self.logger.path(
                     "gurobi_constraints.tsv")))
     else:
         self.set_assignment(assigner.process())
     if bool(self.parameters["log"]):
         assigner.report(self.logger.path("variables_usage.png"),
                         self.logger.path("assignment_stats.png"),
                         self.logger.path("clauses.tsv"),
                         self.logger.path("clauses")).save(
                             self.logger.path("report_assigner.txt"))
         self.get_assignment().save(self.logger.path("assignment.tsv"),
                                    self.get_kb())
         self.get_assignment().log_true(self.logger.path("assignment.txt"),
                                        self.get_kb(), self.get_taxonomy())
         for d in Dimensions.iter():
             self.get_assignment().draw(
                 self.get_kb(),
                 self.get_taxonomy(),
                 d,
                 self.logger.path("top-" + Dimensions.label(d, slug=True) +
                                  ".svg"),
             )
Ejemplo n.º 2
0
 def plot(self, path):
     drawers = list()
     for dimension in Dimensions.iter():
         drawers.append(
             EvidenceDrawer(
                 path + "-" + Dimensions.label(dimension, slug=True),
                 {
                     index: self[index][dimension]
                     for index in self.inputs.get_kb()
                 },
                 Dimensions.label(dimension),
                 self.inputs.get_kb(),
                 self.inputs.get_taxonomy(),
             ))
     for cls in self.cues:
         drawers.append(
             EvidenceDrawer(
                 path + "-" + cls.name,
                 {
                     index: self.cues[cls][index]
                     for index in self.inputs.get_kb()
                 },
                 cls.name,
                 self.inputs.get_kb(),
                 self.inputs.get_taxonomy(),
             ))
     for drawer in drawers:
         drawer.top()
         drawer.distrib()
Ejemplo n.º 3
0
 def build(self, verbose=True):
     for cls in self.cues:
         if verbose:
             print("Gathering", cls.__name__)
         self.cues[cls] = cls()
         self.cues[cls].gather(self.inputs,
                               verbose=verbose,
                               joint_cue=self.cues[JointCue])
     for index in self.inputs.get_kb():
         self[index] = EvidenceWrapper(index, self.cues.values())
     for dimension in Dimensions.iter():
         if verbose:
             print("Normalizing", Dimensions.label(dimension), "evidence")
         self.normalize(dimension, verbose)
Ejemplo n.º 4
0
class TrackerFact(Fact):

    header = Fact.header + [
        "{dimension}_{field}".format(
            dimension=Dimensions.label(dimension, slug=True), field=field)
        for dimension in Dimensions.iter()
        for field in ["evidence", "assignment", "confidence"]
    ]

    def __init__(self):
        Fact.__init__(self)
        self.attributes = {
            dimension: {
                field: None
                for field in ["evidence", "assignment", "confidence"]
            }
            for dimension in Dimensions.iter()
        }

    def from_fact(fact):
        tracker_fact = TrackerFact()
        tracker_fact.index = fact.index
        tracker_fact.subject = fact.subject
        tracker_fact.property = fact.property
        tracker_fact.modality = fact.modality
        tracker_fact.score = fact.score
        tracker_fact.text = fact.text
        tracker_fact.sense = fact.sense
        tracker_fact.source = fact.source
        return tracker_fact

    def __str__(self):
        return Fact.__str__(self) + "\t" + "\t".join(
            str(self.attributes[dimension][field])
            for dimension in Dimensions.iter()
            for field in ["evidence", "assignment", "confidence"])

    def parse(self, line):
        Fact.parse(self, line)
        split = line.strip().split("\t")
        i = 7
        for dimension in Dimensions.iter():
            for field in ["evidence", "assignment", "confidence"]:
                i += 1
                if i % 3 == 0:
                    self.attributes[dimension][field] = split[i] == "True"
                else:
                    self.attributes[dimension][field] = float(split[i])
Ejemplo n.º 5
0
 def log_true(self, path, kb, taxonomy):
     grouped = self.group()
     concepts = taxonomy.relation._imap
     with codecs.open(path, "w", "utf-8") as file:
         for concept in concepts:
             file.write("# " + concept + "\n\n")
             for d in Dimensions.iter():
                 file.write("## " + Dimensions.label(d) + "\n\n")
                 for index in concepts[concept]:
                     if grouped[index][d] == self.TRUE:
                         file.write("{}\t{}\n".format(
                             kb[index],
                             self.confidence[Variable(index, d)]
                         ))
                 file.write("\n")
             file.write("\n")
Ejemplo n.º 6
0
 def group(self):
     grouped = dict()
     for x in self.map:
         if x.index not in grouped:
             grouped[x.index] = {d: self.UNKNOWN for d in Dimensions.iter()}
         grouped[x.index][x.dimension] = self.map[x]
     return grouped
Ejemplo n.º 7
0
 def process(self, path, amount):
     seed = rd.randint(1, 2**31 - 1)
     subjects = dict()
     for fact in self.tracker.values():
         subjects.setdefault(fact.subject, list())
         subjects[fact.subject].append(fact.index)
     rd.seed(seed)
     print("Random seed:", seed)
     pairs = [(i, j) for s in subjects for i in subjects[s]
              for j in subjects[s] if i > j]
     weights = [
         sum([
             abs(self.tracker[i].attributes[d]["confidence"] -
                 self.tracker[j].attributes[d]["confidence"]) /
             len(subjects[self.tracker[i].subject])**1.5
             for d in Dimensions.iter()
         ]) for i, j in pairs
     ]
     selection = list()
     while len(selection) < amount and len(pairs) > 0:
         choice = rd.choices(pairs, k=1, weights=weights)[0]
         index = pairs.index(choice)
         selection.append(Pair(self.tracker, *choice))
         pairs.pop(index)
         weights.pop(index)
     with open(path, "w") as file:
         file.write("\t".join(Pair.HEADER) + "\n")
         for pair in selection:
             file.write(pair.tsv())
Ejemplo n.º 8
0
 def score(self, dimension, log=False):
     loss, count = 0., 0
     for index, row in self.annotation.iterrows():
         if abs(3 - row[dimension]) < self.CONFIDENCE:
             continue
         if (row["source_1"], int(row["index_1"])) not in self.tracker:
             continue
         if (row["source_2"], int(row["index_2"])) not in self.tracker:
             continue
         fact_1 = self.tracker[(row["source_1"], row["index_1"])]
         fact_2 = self.tracker[(row["source_2"], row["index_2"])]
         count += 1
         this_loss = self.loss(
             self.predict(fact_1, fact_2, dimension),
             self.gold(row, dimension),
         )
         if log and this_loss != 0:
             print("\t".join([
                 str(self.predict(fact_1, fact_2, dimension)),
                 str(self.gold(row, dimension)),
                 str(this_loss),
                 Dimensions.label(dimension),
                 repr(fact_1),
                 repr(fact_2),
             ]))
         loss += this_loss
     return loss, count
Ejemplo n.º 9
0
 def report(self, filename):
     counts = {
         d: {Assignment.TRUE: 0, Assignment.FALSE: 0}
         for d in Dimensions.iter()
     }
     for x, val in self.map.items():
         counts[x.dimension][val] += 1
     trues = [counts[d][Assignment.TRUE] for d in Dimensions.iter()]
     falses = [counts[d][Assignment.FALSE] for d in Dimensions.iter()]
     width = .4
     def pos(offset):
         return [i + offset * width for i in range(len(trues))]
     plt.figure(figsize=(8, 8))
     plt.bar(pos(0), trues, width, label="True")
     plt.bar(pos(1), falses, width, label="False")
     plt.xticks(pos(.5), [Dimensions.label(d) for d in Dimensions.iter()])
     plt.ylabel("Number of facts")
     plt.legend(loc="best")
     plt.savefig(filename, format="png")
     plt.close()
     report = Report()
     for d in Dimensions.iter():
         report.add_value_ratio(
             Dimensions.label(d),
             counts[d][Assignment.TRUE],
             counts[d][Assignment.TRUE] + counts[d][Assignment.FALSE]
         )
     return report
Ejemplo n.º 10
0
 def __init__(self):
     Fact.__init__(self)
     self.attributes = {
         dimension: {
             field: None
             for field in ["evidence", "assignment", "confidence"]
         }
         for dimension in Dimensions.iter()
     }
Ejemplo n.º 11
0
 def build_ranks(self):
     for dimension in Dimensions.iter():
         rank_evidence = sorted(self.keys(),
                                key=lambda index: -self[index].attributes[
                                    dimension]["evidence"])
         rank_confidence = sorted(self.keys(),
                                  key=lambda index: -self[index].attributes[
                                      dimension]["confidence"])
         for i in range(len(rank_evidence)):
             self.evidence_rank[dimension][rank_evidence[i]] = i + 1
             self.confidence_rank[dimension][rank_confidence[i]] = i + 1
Ejemplo n.º 12
0
 def parse(self, line):
     Fact.parse(self, line)
     split = line.strip().split("\t")
     i = 7
     for dimension in Dimensions.iter():
         for field in ["evidence", "assignment", "confidence"]:
             i += 1
             if i % 3 == 0:
                 self.attributes[dimension][field] = split[i] == "True"
             else:
                 self.attributes[dimension][field] = float(split[i])
Ejemplo n.º 13
0
 def build(self, inputs):
     detective = inputs.get_detective()
     assignment = inputs.get_assignment()
     for dimension in Dimensions.iter():
         for fact in inputs.get_kb().values():
             if Variable(fact.index, dimension) not in assignment.map:
                 continue
             self.setdefault(fact.index, TrackerFact.from_fact(fact))
             self[fact.index].attributes[dimension]["evidence"] =\
                 detective[fact.index][dimension]
             self[fact.index].attributes[dimension]["assignment"] =\
                 assignment.map[Variable(fact.index, dimension)] == Assignment.TRUE
             self[fact.index].attributes[dimension]["confidence"] =\
                 assignment.confidence[Variable(fact.index, dimension)]
     self.build_ranks()
Ejemplo n.º 14
0
 def ground(self):
     kb = self.grounder.inputs.get_kb()
     similarity = self.grounder.inputs.get_similarity_matrix()
     for x in self.grounder.concepts:
         facts_x = dict()
         properties_x = list()
         for i, index in enumerate(self.grounder.concepts[x]):
             properties_x.append(similarity.index[kb[index].property])
             facts_x[i] = index
         submatrix = similarity.matrix[properties_x][:, properties_x]
         for i, j in zip(*submatrix.nonzero()):
             for dimension in Dimensions.iter():
                 self.add([Variable(facts_x[i], dimension)],
                          [Variable(facts_x[j], dimension)],
                          similarity_weight=submatrix[i, j],
                          taxonomy_weight=1.)
Ejemplo n.º 15
0
def pair_evaluator(argv):
    """pair_evaluator
    arguments:  <annotation-file> <feature> <confidence> <tracker-file>+
    """
    from dice.evaluation import Tracker, PairEvaluator
    from dice.constants import Dimensions
    annotation_file, feature, confidence, *tracker_files = argv
    PairEvaluator.FEATURE = feature
    PairEvaluator.CONFIDENCE = float(confidence)
    evaluator = PairEvaluator(annotation_file,
                              *[Tracker(f) for f in tracker_files])
    print(" " * 8 + "\t ppref\t size")
    for dimension, results in evaluator.evaluate(details=True).items():
        if dimension == -1:
            print("Overall \t", round(1 - results["mae"], 2), "\t",
                  results["n"])
        else:
            print(Dimensions.label(dimension), "\t",
                  round(1 - results["mae"], 2), "\t", results["n"])
Ejemplo n.º 16
0
 def evaluate(self, details=False, log=False):
     results = dict()
     total_loss, total_count = 0., 0
     for dimension in Dimensions.iter():
         loss, count = self.score(dimension, log)
         if count == 0:
             count = 1
         results[dimension] = {
             "mae": loss / count,
             "n": count,
         }
         total_loss += loss
         total_count += count
     results[-1] = {
         "mae": total_loss / total_count,
         "n": total_count,
     }
     if details:
         return results
     return total_loss / total_count
Ejemplo n.º 17
0
 def __init__(self, path=None):
     self.evidence_rank = {d: dict() for d in Dimensions.iter()}
     self.confidence_rank = {d: dict() for d in Dimensions.iter()}
     KnowledgeBase.__init__(self, path)
Ejemplo n.º 18
0
 def _ground(self, fact):
     for dimension in Dimensions.iter():
         self.add(
             [Variable(fact, dimension)],
             [],
         )
Ejemplo n.º 19
0
 def __str__(self):
     return Fact.__str__(self) + "\t" + "\t".join(
         str(self.attributes[dimension][field])
         for dimension in Dimensions.iter()
         for field in ["evidence", "assignment", "confidence"])
Ejemplo n.º 20
0
 def _ground(self, fact_x, fact_y, similarity_weight):
     for dimension in Dimensions.iter():
         self.add([Variable(fact_x, dimension)],
                  [Variable(fact_y, dimension)],
                  similarity_weight=similarity_weight,
                  taxonomy_weight=1.)
Ejemplo n.º 21
0
    def generate(self, script, remote_subject_filter=None):
        declaration_regex = re.compile(
            "{{(\d+)_(salient|typical|plausible|remarkable|random|score)_(up|down)}}"
        )
        placeholder_regex = re.compile("{{(s|p)_(\d+)}}")
        bindings_regex = re.compile("{{bind_(\d+)_(\d+)}}")
        bes_regex = re.compile("{{be_(\w+)}}")
        bindings = bindings_regex.findall(script)
        be_rep = list(bes_regex.findall(script))
        if len(be_rep) > 0:
            be_rep = be_rep[0]
        else:
            be_rep = "be"

        while True:

            facts = dict()
            selected = set()

            retry = False

            for index, dimension, direction in declaration_regex.findall(
                    script):

                # Checking if the fact is already bound to another one
                subject_filter = None
                for pair in bindings:
                    if index not in set(pair):
                        continue
                    index_a, index_b = pair
                    if index == index_a and index_b in facts:
                        subject_filter = facts[index_b].subject
                    elif index == index_b and index_a in facts:
                        subject_filter = facts[index_a].subject
                if remote_subject_filter is not None:
                    subject_filter = remote_subject_filter

                # Getting the list of new facts
                source = [
                    fact for fact in self.tracker.values()
                    if fact.index not in selected and " " not in fact.subject
                ]

                # Applying filter if necessary
                if subject_filter is not None:
                    source = [
                        fact for fact in source
                        if fact.subject == subject_filter
                    ]

                # Re-do the sampling if the current one would not have enough facts
                if len(source) == 0:
                    retry = True
                    break

                # Sorting facts according to the correct dimension
                if dimension == "random":
                    random.shuffle(source)
                    ranked = source[:]
                elif dimension == "score":
                    ranked = sorted(source, key=lambda fact: fact.score)
                else:
                    ranked = sorted(
                        source,
                        key=lambda fact: fact.attributes[Dimensions.from_label(
                            dimension)]["confidence"])

                # Narrowing down selection space
                slice = []
                if direction == "up":
                    slice = ranked[int(.9 * len(ranked)):]
                elif direction == "down":
                    slice = ranked[:int(.1 * len(ranked))]
                if len(slice) == 0:
                    if direction == "up":
                        slice = ranked[-1:]
                    elif direction == "down":
                        slice = ranked[:1]

                # Choosing and selecting fact
                fact = random.choice(slice)
                selected.add(fact.index)
                facts[index] = fact

            # Reaching this points ensures that all facts have been correctly
            # sampled.
            if not retry:
                break

        out = declaration_regex.sub("", script)
        out = bindings_regex.sub("", out)
        out = bes_regex.sub("", out)

        def replacer(match):
            field, index = match.group(1), match.group(2)
            if field == "s":
                return facts[index].subject
            return facts[index].property.replace("be ", be_rep + " ")

        text = placeholder_regex.sub(replacer, out).strip()
        corrected = ""
        for sentence in text.split("\n"):
            matches = self.tool.check(sentence)
            corrected_sentence = language_check.correct(sentence, matches)
            corrected += corrected_sentence.strip() + "\n"
        return {
            "text": text,
            "html": re.sub("\n", "<br>", corrected),
            "samples": facts,
            "seed": self.seed
        }
Ejemplo n.º 22
0
Archivo: ilp.py Proyecto: ychalier/dice
 def solve(self, variables_path=None, constraints_path=None):
     open(self.gurobi_log_file, "w").close()
     self.model.params.Threads = 12
     self.model.optimize()
     self.assignment = Assignment(self.variables)
     f_vars, f_cstr = None, None
     if constraints_path is not None:
         f_cstr = open(constraints_path, "w")
         f_cstr.write("\t".join(Ilp.cstr_attrs) + "\n")
     if f_cstr is not None:
         for constraint in self.model.getConstrs():
             for attr in Ilp.cstr_attrs:
                 value = ""
                 try:
                     value = constraint.getAttr(attr)
                 except:
                     pass
                 f_cstr.write(str(value) + "\t")
             f_cstr.write("\n")
         f_cstr.close()
     if variables_path is not None:
         f_vars = open(variables_path, "w")
         f_vars.write("\t".join(Ilp.vars_attrs) + "\n")
     inner_confidence = []
     for gurobi_var in self.model.getVars():
         if gurobi_var.varName[0] not in "PTRS":
             continue
         if gurobi_var.rc == 0:
             up = min(2, gurobi_var.SAObjUp)
             low = max(-2, gurobi_var.SAObjLow)
             inner_confidence.append((gurobi_var.x - .51) * (up - low))
         if len(inner_confidence) == 0:
             a, b = 0, 1
         else:
             a, b = min(inner_confidence), max(inner_confidence)
     for gurobi_var in self.model.getVars():
         if f_vars is not None:
             for attr in Ilp.vars_attrs:
                 value = ""
                 try:
                     value = gurobi_var.getAttr(attr)
                 except:
                     pass
                 f_vars.write(str(value) + "\t")
             f_vars.write("\n")
         letter = gurobi_var.varName[0]
         if letter not in "PTRS":
             continue
         index = int(gurobi_var.varName[2:-1])
         confidence = 0
         if gurobi_var.rc != 0:
             confidence = gurobi_var.rc
         else:
             confidence = gurobi_var.x + gurobi_var.obj + Parameters.EVIDENCE_OFFSET
         self.assignment.assign(
             Variable(index, Dimensions.from_letter(letter)),
             gurobi_var.x >= .5,
             confidence,
         )
     if f_vars is not None:
         f_vars.close()
     return self.assignment
Ejemplo n.º 23
0
 def ground(self, include_evidence_rule=True):
     if self.verbose:
         print("Grounding...")
     for index in self.kb:
         for dimension in Dimensions.iter():
             self.variables.append(Variable(index, dimension))
     concepts_rules = (SimilarityRule(self, self.clauses), )
     subconcept_rules = (
         RulePlausibilityInheritance(self, self.clauses),
         RuleTypicalityInheritance(self, self.clauses),
         RulePlausibilityInference(self, self.clauses),
         RuleRemarkabilityInheritance(self, self.clauses),
         RuleTypicalPreventsRemarkable(self, self.clauses),
         RuleNotPlausibleImpliesRemarkable(self, self.clauses),
     )
     siblings_rules = (
         RuleNotPlausibleImpliesRemarkableSiblings(self, self.clauses),
         RuleRemarkabilitySiblings(self, self.clauses),
         RuleTypicalPreventsRemarkableSiblings(self, self.clauses),
     )
     other_rules = (
         RuleSalientImpliesPlausible,
         RuleTypicalImpliesPlausible,
         RuleTypicalAndRemarkableImplySalient,
         ExistenceRule,
     )
     kb = self.inputs.get_kb()
     similarity = self.inputs.get_similarity_matrix()
     concept_links = dict()
     parent_links = dict()
     siblings_links = dict()
     for x in self.concepts:
         for fact_x in self.concepts[x]:
             concept_links[fact_x] = set(self.concepts[x])
             parent_links[fact_x] = set()
             siblings_links[fact_x] = set()
         if x not in self.taxonomy.nodes:
             continue
         for child in self.taxonomy.successors(x):
             for fact_y in self.concepts.get(child, list()):
                 parent_links[fact_x].add(fact_y)
         for sibling in self.taxonomy.siblings(x):
             for fact_y in self.concepts.get(sibling, list()):
                 siblings_links[fact_x].add(fact_y)
     properties = dict()
     for fact in kb.values():
         ip = similarity.index[fact.property]
         properties.setdefault(ip, set())
         properties[ip].add(fact.index)
     inds = list(properties.keys())
     submatrix = similarity.matrix[inds][:, inds]
     for i, j in zip(*submatrix.nonzero()):
         similarity_weight = submatrix[i, j]
         for fact_x in properties[inds[i]]:
             for fact_y in properties[inds[j]].intersection(
                     parent_links[fact_x]):
                 for rule in subconcept_rules:
                     rule._ground(fact_x, fact_y, 1., similarity_weight)
             for fact_y in properties[inds[j]].intersection(
                     siblings_links[fact_x]):
                 for rule in siblings_rules:
                     rule._ground(fact_x, fact_y, 1., similarity_weight)
             for fact_y in properties[inds[j]].intersection(
                     concept_links[fact_x]):
                 for rule in concepts_rules:
                     rule._ground(fact_x, fact_y, similarity_weight)
     for rule in other_rules:
         rule(self, self.clauses).ground()
     if include_evidence_rule:
         EvidenceRule(self, self.clauses).ground()
     return self.variables, self.clauses