def _ground(self, brother, sister, taxonomy_weight, similarity_weight):
     self.add(
         [],
         [Variable(brother, Dimensions.REMARKABLE), Variable(sister, Dimensions.TYPICAL)],
         taxonomy_weight=taxonomy_weight,
         similarity_weight=similarity_weight
     )
 def _ground(self, parent, child, taxonomy_weight, similarity_weight):
     self.add(
         [Variable(parent, Dimensions.PLAUSIBLE)],
         [Variable(child, Dimensions.TYPICAL)],
         taxonomy_weight=taxonomy_weight,
         similarity_weight=similarity_weight,
     )
Example #3
0
 def _ground(self, fact):
     self.add(
         [Variable(fact, Dimensions.SALIENT)],
         [
             Variable(fact, Dimensions.TYPICAL),
             Variable(fact, Dimensions.REMARKABLE)
         ],
         similarity_weight=1,
     )
 def _ground(self, parent, child, taxonomy_weight, similarity_weight):
     self.add(
         [],
         [
             Variable(child, Dimensions.REMARKABLE),
             Variable(parent, Dimensions.REMARKABLE)
         ],
         taxonomy_weight=taxonomy_weight,
         similarity_weight=similarity_weight,
     )
Example #5
0
 def build(self, inputs):
     detective = inputs.get_detective()
     assignment = inputs.get_assignment()
     for dimension in Dimensions.iter():
         for fact in inputs.get_kb().values():
             if Variable(fact.index, dimension) not in assignment.map:
                 continue
             self.setdefault(fact.index, TrackerFact.from_fact(fact))
             self[fact.index].attributes[dimension]["evidence"] =\
                 detective[fact.index][dimension]
             self[fact.index].attributes[dimension]["assignment"] =\
                 assignment.map[Variable(fact.index, dimension)] == Assignment.TRUE
             self[fact.index].attributes[dimension]["confidence"] =\
                 assignment.confidence[Variable(fact.index, dimension)]
     self.build_ranks()
Example #6
0
 def ground(self):
     kb = self.grounder.inputs.get_kb()
     similarity = self.grounder.inputs.get_similarity_matrix()
     for x in self.grounder.concepts:
         facts_x = dict()
         properties_x = list()
         for i, index in enumerate(self.grounder.concepts[x]):
             properties_x.append(similarity.index[kb[index].property])
             facts_x[i] = index
         submatrix = similarity.matrix[properties_x][:, properties_x]
         for i, j in zip(*submatrix.nonzero()):
             for dimension in Dimensions.iter():
                 self.add([Variable(facts_x[i], dimension)],
                          [Variable(facts_x[j], dimension)],
                          similarity_weight=submatrix[i, j],
                          taxonomy_weight=1.)
Example #7
0
 def ground(self):
     for x in self.grounder.concepts:
         if x not in self.grounder.taxonomy.nodes:
             continue
         for p in self.grounder.properties[x]:
             negatives = []
             for y in self.grounder.taxonomy.successors(x):
                 if y not in self.grounder.properties:
                     continue
                 if p not in self.grounder.properties[y]:
                     continue
                 for child in self.grounder.properties[y][p]:
                     negatives.append(Variable(child, Dimensions.TYPICAL))
             if len(negatives) == 0:
                 continue
             for parent in self.grounder.properties[x][p]:
                 self.add([Variable(parent, Dimensions.TYPICAL)],
                          negatives,
                          similarity_weight=1)
Example #8
0
 def load(self, path):
     with open(path) as file:
         lines = file.readlines()
     for line in lines[1:]:
         index, p, t, r, s, cp, ct, cr, cs = line.strip().split("\t")
         self.map[Variable(int(index), Dimensions.PLAUSIBLE)] = int(p)
         self.map[Variable(int(index), Dimensions.TYPICAL)] = int(t)
         self.map[Variable(int(index), Dimensions.REMARKABLE)] = int(r)
         self.map[Variable(int(index), Dimensions.SALIENT)] = int(s)
         self.confidence[Variable(int(index), Dimensions.PLAUSIBLE)] = float(cp)
         self.confidence[Variable(int(index), Dimensions.TYPICAL)] = float(ct)
         self.confidence[Variable(int(index), Dimensions.REMARKABLE)] = float(cr)
         self.confidence[Variable(int(index), Dimensions.SALIENT)] = float(cs)
Example #9
0
 def custom_label(u):
     scores = [
         (f, self.confidence[Variable(f, dimension)])
         for f in concepts.get(u, [])
     ]
     scores.sort(key=lambda x: -x[1])
     if len(scores) > 0:
         return u + "\n" + "\n".join(
             [kb[s[0]].property
              for s in scores[:3]])
     else:
         return u
Example #10
0
 def save(self, path, kb=None):
     def format_fact(index):
         if kb is None: return index
         return repr(kb[index])
     def format_truth(value):
         if kb is None: return value
         return value == Assignment.TRUE
     grouped = self.group()
     with codecs.open(path, "w", "utf-8") as file:
         header = "fact\tplausible\ttypical\tremarkable\tsalient\t"
         header += "confidence_plausible\tconfidence_typical\t"
         header += "confidence_remarkable\tconfidence_salient\n"
         file.write(header)
         for index in grouped:
             file.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
                 format_fact(index),
                 format_truth(grouped[index][Dimensions.PLAUSIBLE]),
                 format_truth(grouped[index][Dimensions.TYPICAL]),
                 format_truth(grouped[index][Dimensions.REMARKABLE]),
                 format_truth(grouped[index][Dimensions.SALIENT]),
                 self.confidence[Variable(index, Dimensions.PLAUSIBLE)],
                 self.confidence[Variable(index, Dimensions.TYPICAL)],
                 self.confidence[Variable(index, Dimensions.REMARKABLE)],
                 self.confidence[Variable(index, Dimensions.SALIENT)]))
Example #11
0
 def log_true(self, path, kb, taxonomy):
     grouped = self.group()
     concepts = taxonomy.relation._imap
     with codecs.open(path, "w", "utf-8") as file:
         for concept in concepts:
             file.write("# " + concept + "\n\n")
             for d in Dimensions.iter():
                 file.write("## " + Dimensions.label(d) + "\n\n")
                 for index in concepts[concept]:
                     if grouped[index][d] == self.TRUE:
                         file.write("{}\t{}\n".format(
                             kb[index],
                             self.confidence[Variable(index, d)]
                         ))
                 file.write("\n")
             file.write("\n")
Example #12
0
 def _ground(self, fact):
     self.add(
         [Variable(fact, Dimensions.PLAUSIBLE)],
         [Variable(fact, Dimensions.SALIENT)],
         similarity_weight=1
     )
Example #13
0
File: ilp.py Project: ychalier/dice
 def solve(self, variables_path=None, constraints_path=None):
     open(self.gurobi_log_file, "w").close()
     self.model.params.Threads = 12
     self.model.optimize()
     self.assignment = Assignment(self.variables)
     f_vars, f_cstr = None, None
     if constraints_path is not None:
         f_cstr = open(constraints_path, "w")
         f_cstr.write("\t".join(Ilp.cstr_attrs) + "\n")
     if f_cstr is not None:
         for constraint in self.model.getConstrs():
             for attr in Ilp.cstr_attrs:
                 value = ""
                 try:
                     value = constraint.getAttr(attr)
                 except:
                     pass
                 f_cstr.write(str(value) + "\t")
             f_cstr.write("\n")
         f_cstr.close()
     if variables_path is not None:
         f_vars = open(variables_path, "w")
         f_vars.write("\t".join(Ilp.vars_attrs) + "\n")
     inner_confidence = []
     for gurobi_var in self.model.getVars():
         if gurobi_var.varName[0] not in "PTRS":
             continue
         if gurobi_var.rc == 0:
             up = min(2, gurobi_var.SAObjUp)
             low = max(-2, gurobi_var.SAObjLow)
             inner_confidence.append((gurobi_var.x - .51) * (up - low))
         if len(inner_confidence) == 0:
             a, b = 0, 1
         else:
             a, b = min(inner_confidence), max(inner_confidence)
     for gurobi_var in self.model.getVars():
         if f_vars is not None:
             for attr in Ilp.vars_attrs:
                 value = ""
                 try:
                     value = gurobi_var.getAttr(attr)
                 except:
                     pass
                 f_vars.write(str(value) + "\t")
             f_vars.write("\n")
         letter = gurobi_var.varName[0]
         if letter not in "PTRS":
             continue
         index = int(gurobi_var.varName[2:-1])
         confidence = 0
         if gurobi_var.rc != 0:
             confidence = gurobi_var.rc
         else:
             confidence = gurobi_var.x + gurobi_var.obj + Parameters.EVIDENCE_OFFSET
         self.assignment.assign(
             Variable(index, Dimensions.from_letter(letter)),
             gurobi_var.x >= .5,
             confidence,
         )
     if f_vars is not None:
         f_vars.close()
     return self.assignment
Example #14
0
 def _ground(self, fact_x, fact_y, similarity_weight):
     for dimension in Dimensions.iter():
         self.add([Variable(fact_x, dimension)],
                  [Variable(fact_y, dimension)],
                  similarity_weight=similarity_weight,
                  taxonomy_weight=1.)
Example #15
0
 def _ground(self, fact):
     for dimension in Dimensions.iter():
         self.add(
             [Variable(fact, dimension)],
             [],
         )
Example #16
0
 def ground(self):
     for x in self.grounder.concepts:
         for p in [Dimensions.TYPICAL, Dimensions.SALIENT]:
             self.add([Variable(i, p) for i in self.grounder.concepts[x]],
                      [], 1.)
Example #17
0
def demo(argv):
    """demo
    arguments:  <inputs-folder> <partition-file> <max-facts-per-subjects> <clean-source> <save-folder>
    """
    inputs_folder, partition_file, maximum_facts_per_subject, clean_source, save_path = argv
    from dice import Inputs
    from dice.misc import Output
    from dice.constants import Dimensions
    from dice.reason import Variable
    from dice.evidence.cues import JointCue
    from dice.evidence.cues import NecessityCue
    from dice.evidence.cues import SufficiencyCue
    from dice.evidence.cues import ImplicationCue
    from dice.evidence.cues import EntailmentCue
    from dice.evidence.cues import ContradictionCue
    from dice.evidence.cues import EntropyCue
    from tqdm import tqdm
    import pandas as pd
    output = Output(save_path)
    inputs = Inputs(inputs_folder)
    print("Loading inputs...")
    kb = inputs.get_kb()
    taxonomy = inputs.get_taxonomy()
    detective = inputs.get_detective()
    assignment = inputs.get_assignment()
    similarity = inputs.get_similarity_matrix()
    data = list()
    selected_indices = set()
    subjects_representation = dict()
    print("Selecting indices...")
    for fact in tqdm(inputs.get_kb().values()):
        subjects_representation.setdefault(fact.subject, list())
        subjects_representation[fact.subject].append(fact.index)
    print("Thresholding number of facts per subject...")
    for subject, indices in tqdm(subjects_representation.items()):
        # if len(indices) > 20:
        selected_indices = selected_indices.union(
            indices[:int(maximum_facts_per_subject)])
    print("Gathering facts...")
    for fact in tqdm(inputs.get_kb().values()):
        if fact.index not in selected_indices:
            continue
        data.append({
            "index":
            fact.index,
            "source":
            clean_source,
            "subject":
            fact.subject,
            "property":
            fact.property,
            "score":
            fact.score,
            "evidence_plausible":
            detective[fact.index].plausible,
            "evidence_typical":
            detective[fact.index].typical,
            "evidence_remarkable":
            detective[fact.index].remarkable,
            "evidence_salient":
            detective[fact.index].salient,
            "cue_joint":
            detective.cues[JointCue][fact.index],
            "cue_necessity":
            detective.cues[NecessityCue][fact.index],
            "cue_sufficiency":
            detective.cues[SufficiencyCue][fact.index],
            "cue_implication":
            detective.cues[ImplicationCue][fact.index],
            "cue_entailment":
            detective.cues[EntailmentCue][fact.index],
            "cue_contradiction":
            detective.cues[ContradictionCue][fact.index],
            "cue_entropy":
            detective.cues[EntropyCue][fact.index],
            "plausible":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.PLAUSIBLE), 0),
            "typical":
            assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL),
                                      0),
            "remarkable":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.REMARKABLE), 0),
            "salient":
            assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT),
                                      0),
            "plausible_percentile":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.PLAUSIBLE), 0),
            "typical_percentile":
            assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL),
                                      0),
            "remarkable_percentile":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.REMARKABLE), 0),
            "salient_percentile":
            assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT),
                                      0),
        })
    df_facts = pd.DataFrame(data)
    del data
    n = df_facts.shape[0]
    print("Normalizing columns...")
    pbar = tqdm(total=20)
    for column in [
            "plausible_percentile", "typical_percentile",
            "remarkable_percentile", "salient_percentile",
            "evidence_plausible", "evidence_typical", "evidence_remarkable",
            "evidence_salient", "cue_joint", "cue_necessity",
            "cue_sufficiency", "cue_implication", "cue_implication",
            "cue_entailment", "cue_contradiction", "cue_entropy"
    ]:
        df_facts = df_facts.sort_values(by=column)
        df_facts[column] = [i / (n - 1) for i in range(n)]
        pbar.update(1)
    for column in ["plausible", "typical", "remarkable", "salient"]:
        values = list()
        a, b = df_facts[column].min(), df_facts[column].max()
        for index, row in df_facts.iterrows():
            values.append((row[column] - a) / (b - a))
        df_facts[column] = values
        pbar.update(1)
    pbar.close()
    print("Gathering partition...")
    data = list()
    with open(partition_file) as file:
        for line in tqdm(file.readlines()):
            count, *indices = list(map(int, line.strip().split("\t")))
            subjects = set([kb[j].subject for j in indices])
            properties_all = list(set([kb[j].property for j in indices]))
            local_indices = [similarity.index[p] for p in properties_all]
            local_matrix = similarity.matrix[local_indices][:, local_indices]
            for i in range(count):
                fact = kb[indices[i]]
                if indices[i] not in selected_indices:
                    continue
                property_index_self = similarity.index[fact.property]
                parents = list()
                children = list()
                siblings = list()
                if fact.subject in taxonomy.nodes:
                    parents = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.predecessors(fact.subject))
                    ]
                    children = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.successors(fact.subject))
                    ]
                    siblings = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.siblings(fact.subject))
                    ]
                properties = list()
                for j, k in zip(*local_matrix.nonzero()):
                    if local_indices[j] != property_index_self:
                        continue
                    properties.append(properties_all[k] + ":" +
                                      str(local_matrix[j, k]))
                data.append({
                    "index": indices[i],
                    "parents": ";".join(parents),
                    "children": ";".join(children),
                    "siblings": ";".join(siblings),
                    "properties": ";".join(properties),
                })
    df_partition = pd.DataFrame(data)
    df = df_facts.set_index("index").join(df_partition.set_index("index"),
                                          on="index",
                                          how="outer")
    df.to_csv(output.path("demo.csv"), index=False)
Example #18
0
 def ground(self, include_evidence_rule=True):
     if self.verbose:
         print("Grounding...")
     for index in self.kb:
         for dimension in Dimensions.iter():
             self.variables.append(Variable(index, dimension))
     concepts_rules = (SimilarityRule(self, self.clauses), )
     subconcept_rules = (
         RulePlausibilityInheritance(self, self.clauses),
         RuleTypicalityInheritance(self, self.clauses),
         RulePlausibilityInference(self, self.clauses),
         RuleRemarkabilityInheritance(self, self.clauses),
         RuleTypicalPreventsRemarkable(self, self.clauses),
         RuleNotPlausibleImpliesRemarkable(self, self.clauses),
     )
     siblings_rules = (
         RuleNotPlausibleImpliesRemarkableSiblings(self, self.clauses),
         RuleRemarkabilitySiblings(self, self.clauses),
         RuleTypicalPreventsRemarkableSiblings(self, self.clauses),
     )
     other_rules = (
         RuleSalientImpliesPlausible,
         RuleTypicalImpliesPlausible,
         RuleTypicalAndRemarkableImplySalient,
         ExistenceRule,
     )
     kb = self.inputs.get_kb()
     similarity = self.inputs.get_similarity_matrix()
     concept_links = dict()
     parent_links = dict()
     siblings_links = dict()
     for x in self.concepts:
         for fact_x in self.concepts[x]:
             concept_links[fact_x] = set(self.concepts[x])
             parent_links[fact_x] = set()
             siblings_links[fact_x] = set()
         if x not in self.taxonomy.nodes:
             continue
         for child in self.taxonomy.successors(x):
             for fact_y in self.concepts.get(child, list()):
                 parent_links[fact_x].add(fact_y)
         for sibling in self.taxonomy.siblings(x):
             for fact_y in self.concepts.get(sibling, list()):
                 siblings_links[fact_x].add(fact_y)
     properties = dict()
     for fact in kb.values():
         ip = similarity.index[fact.property]
         properties.setdefault(ip, set())
         properties[ip].add(fact.index)
     inds = list(properties.keys())
     submatrix = similarity.matrix[inds][:, inds]
     for i, j in zip(*submatrix.nonzero()):
         similarity_weight = submatrix[i, j]
         for fact_x in properties[inds[i]]:
             for fact_y in properties[inds[j]].intersection(
                     parent_links[fact_x]):
                 for rule in subconcept_rules:
                     rule._ground(fact_x, fact_y, 1., similarity_weight)
             for fact_y in properties[inds[j]].intersection(
                     siblings_links[fact_x]):
                 for rule in siblings_rules:
                     rule._ground(fact_x, fact_y, 1., similarity_weight)
             for fact_y in properties[inds[j]].intersection(
                     concept_links[fact_x]):
                 for rule in concepts_rules:
                     rule._ground(fact_x, fact_y, similarity_weight)
     for rule in other_rules:
         rule(self, self.clauses).ground()
     if include_evidence_rule:
         EvidenceRule(self, self.clauses).ground()
     return self.variables, self.clauses
 def _ground(self, fact):
     self.add(
         [Variable(fact, Dimensions.PLAUSIBLE)],
         [Variable(fact, Dimensions.TYPICAL)],
         similarity_weight=1,
     )