def _ground(self, brother, sister, taxonomy_weight, similarity_weight): self.add( [], [Variable(brother, Dimensions.REMARKABLE), Variable(sister, Dimensions.TYPICAL)], taxonomy_weight=taxonomy_weight, similarity_weight=similarity_weight )
def _ground(self, parent, child, taxonomy_weight, similarity_weight): self.add( [Variable(parent, Dimensions.PLAUSIBLE)], [Variable(child, Dimensions.TYPICAL)], taxonomy_weight=taxonomy_weight, similarity_weight=similarity_weight, )
def _ground(self, fact): self.add( [Variable(fact, Dimensions.SALIENT)], [ Variable(fact, Dimensions.TYPICAL), Variable(fact, Dimensions.REMARKABLE) ], similarity_weight=1, )
def _ground(self, parent, child, taxonomy_weight, similarity_weight): self.add( [], [ Variable(child, Dimensions.REMARKABLE), Variable(parent, Dimensions.REMARKABLE) ], taxonomy_weight=taxonomy_weight, similarity_weight=similarity_weight, )
def build(self, inputs): detective = inputs.get_detective() assignment = inputs.get_assignment() for dimension in Dimensions.iter(): for fact in inputs.get_kb().values(): if Variable(fact.index, dimension) not in assignment.map: continue self.setdefault(fact.index, TrackerFact.from_fact(fact)) self[fact.index].attributes[dimension]["evidence"] =\ detective[fact.index][dimension] self[fact.index].attributes[dimension]["assignment"] =\ assignment.map[Variable(fact.index, dimension)] == Assignment.TRUE self[fact.index].attributes[dimension]["confidence"] =\ assignment.confidence[Variable(fact.index, dimension)] self.build_ranks()
def ground(self): kb = self.grounder.inputs.get_kb() similarity = self.grounder.inputs.get_similarity_matrix() for x in self.grounder.concepts: facts_x = dict() properties_x = list() for i, index in enumerate(self.grounder.concepts[x]): properties_x.append(similarity.index[kb[index].property]) facts_x[i] = index submatrix = similarity.matrix[properties_x][:, properties_x] for i, j in zip(*submatrix.nonzero()): for dimension in Dimensions.iter(): self.add([Variable(facts_x[i], dimension)], [Variable(facts_x[j], dimension)], similarity_weight=submatrix[i, j], taxonomy_weight=1.)
def ground(self): for x in self.grounder.concepts: if x not in self.grounder.taxonomy.nodes: continue for p in self.grounder.properties[x]: negatives = [] for y in self.grounder.taxonomy.successors(x): if y not in self.grounder.properties: continue if p not in self.grounder.properties[y]: continue for child in self.grounder.properties[y][p]: negatives.append(Variable(child, Dimensions.TYPICAL)) if len(negatives) == 0: continue for parent in self.grounder.properties[x][p]: self.add([Variable(parent, Dimensions.TYPICAL)], negatives, similarity_weight=1)
def load(self, path): with open(path) as file: lines = file.readlines() for line in lines[1:]: index, p, t, r, s, cp, ct, cr, cs = line.strip().split("\t") self.map[Variable(int(index), Dimensions.PLAUSIBLE)] = int(p) self.map[Variable(int(index), Dimensions.TYPICAL)] = int(t) self.map[Variable(int(index), Dimensions.REMARKABLE)] = int(r) self.map[Variable(int(index), Dimensions.SALIENT)] = int(s) self.confidence[Variable(int(index), Dimensions.PLAUSIBLE)] = float(cp) self.confidence[Variable(int(index), Dimensions.TYPICAL)] = float(ct) self.confidence[Variable(int(index), Dimensions.REMARKABLE)] = float(cr) self.confidence[Variable(int(index), Dimensions.SALIENT)] = float(cs)
def custom_label(u): scores = [ (f, self.confidence[Variable(f, dimension)]) for f in concepts.get(u, []) ] scores.sort(key=lambda x: -x[1]) if len(scores) > 0: return u + "\n" + "\n".join( [kb[s[0]].property for s in scores[:3]]) else: return u
def save(self, path, kb=None): def format_fact(index): if kb is None: return index return repr(kb[index]) def format_truth(value): if kb is None: return value return value == Assignment.TRUE grouped = self.group() with codecs.open(path, "w", "utf-8") as file: header = "fact\tplausible\ttypical\tremarkable\tsalient\t" header += "confidence_plausible\tconfidence_typical\t" header += "confidence_remarkable\tconfidence_salient\n" file.write(header) for index in grouped: file.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( format_fact(index), format_truth(grouped[index][Dimensions.PLAUSIBLE]), format_truth(grouped[index][Dimensions.TYPICAL]), format_truth(grouped[index][Dimensions.REMARKABLE]), format_truth(grouped[index][Dimensions.SALIENT]), self.confidence[Variable(index, Dimensions.PLAUSIBLE)], self.confidence[Variable(index, Dimensions.TYPICAL)], self.confidence[Variable(index, Dimensions.REMARKABLE)], self.confidence[Variable(index, Dimensions.SALIENT)]))
def log_true(self, path, kb, taxonomy): grouped = self.group() concepts = taxonomy.relation._imap with codecs.open(path, "w", "utf-8") as file: for concept in concepts: file.write("# " + concept + "\n\n") for d in Dimensions.iter(): file.write("## " + Dimensions.label(d) + "\n\n") for index in concepts[concept]: if grouped[index][d] == self.TRUE: file.write("{}\t{}\n".format( kb[index], self.confidence[Variable(index, d)] )) file.write("\n") file.write("\n")
def _ground(self, fact): self.add( [Variable(fact, Dimensions.PLAUSIBLE)], [Variable(fact, Dimensions.SALIENT)], similarity_weight=1 )
def solve(self, variables_path=None, constraints_path=None): open(self.gurobi_log_file, "w").close() self.model.params.Threads = 12 self.model.optimize() self.assignment = Assignment(self.variables) f_vars, f_cstr = None, None if constraints_path is not None: f_cstr = open(constraints_path, "w") f_cstr.write("\t".join(Ilp.cstr_attrs) + "\n") if f_cstr is not None: for constraint in self.model.getConstrs(): for attr in Ilp.cstr_attrs: value = "" try: value = constraint.getAttr(attr) except: pass f_cstr.write(str(value) + "\t") f_cstr.write("\n") f_cstr.close() if variables_path is not None: f_vars = open(variables_path, "w") f_vars.write("\t".join(Ilp.vars_attrs) + "\n") inner_confidence = [] for gurobi_var in self.model.getVars(): if gurobi_var.varName[0] not in "PTRS": continue if gurobi_var.rc == 0: up = min(2, gurobi_var.SAObjUp) low = max(-2, gurobi_var.SAObjLow) inner_confidence.append((gurobi_var.x - .51) * (up - low)) if len(inner_confidence) == 0: a, b = 0, 1 else: a, b = min(inner_confidence), max(inner_confidence) for gurobi_var in self.model.getVars(): if f_vars is not None: for attr in Ilp.vars_attrs: value = "" try: value = gurobi_var.getAttr(attr) except: pass f_vars.write(str(value) + "\t") f_vars.write("\n") letter = gurobi_var.varName[0] if letter not in "PTRS": continue index = int(gurobi_var.varName[2:-1]) confidence = 0 if gurobi_var.rc != 0: confidence = gurobi_var.rc else: confidence = gurobi_var.x + gurobi_var.obj + Parameters.EVIDENCE_OFFSET self.assignment.assign( Variable(index, Dimensions.from_letter(letter)), gurobi_var.x >= .5, confidence, ) if f_vars is not None: f_vars.close() return self.assignment
def _ground(self, fact_x, fact_y, similarity_weight): for dimension in Dimensions.iter(): self.add([Variable(fact_x, dimension)], [Variable(fact_y, dimension)], similarity_weight=similarity_weight, taxonomy_weight=1.)
def _ground(self, fact): for dimension in Dimensions.iter(): self.add( [Variable(fact, dimension)], [], )
def ground(self): for x in self.grounder.concepts: for p in [Dimensions.TYPICAL, Dimensions.SALIENT]: self.add([Variable(i, p) for i in self.grounder.concepts[x]], [], 1.)
def demo(argv): """demo arguments: <inputs-folder> <partition-file> <max-facts-per-subjects> <clean-source> <save-folder> """ inputs_folder, partition_file, maximum_facts_per_subject, clean_source, save_path = argv from dice import Inputs from dice.misc import Output from dice.constants import Dimensions from dice.reason import Variable from dice.evidence.cues import JointCue from dice.evidence.cues import NecessityCue from dice.evidence.cues import SufficiencyCue from dice.evidence.cues import ImplicationCue from dice.evidence.cues import EntailmentCue from dice.evidence.cues import ContradictionCue from dice.evidence.cues import EntropyCue from tqdm import tqdm import pandas as pd output = Output(save_path) inputs = Inputs(inputs_folder) print("Loading inputs...") kb = inputs.get_kb() taxonomy = inputs.get_taxonomy() detective = inputs.get_detective() assignment = inputs.get_assignment() similarity = inputs.get_similarity_matrix() data = list() selected_indices = set() subjects_representation = dict() print("Selecting indices...") for fact in tqdm(inputs.get_kb().values()): subjects_representation.setdefault(fact.subject, list()) subjects_representation[fact.subject].append(fact.index) print("Thresholding number of facts per subject...") for subject, indices in tqdm(subjects_representation.items()): # if len(indices) > 20: selected_indices = selected_indices.union( indices[:int(maximum_facts_per_subject)]) print("Gathering facts...") for fact in tqdm(inputs.get_kb().values()): if fact.index not in selected_indices: continue data.append({ "index": fact.index, "source": clean_source, "subject": fact.subject, "property": fact.property, "score": fact.score, "evidence_plausible": detective[fact.index].plausible, "evidence_typical": detective[fact.index].typical, "evidence_remarkable": detective[fact.index].remarkable, "evidence_salient": detective[fact.index].salient, "cue_joint": detective.cues[JointCue][fact.index], "cue_necessity": detective.cues[NecessityCue][fact.index], "cue_sufficiency": detective.cues[SufficiencyCue][fact.index], "cue_implication": detective.cues[ImplicationCue][fact.index], "cue_entailment": detective.cues[EntailmentCue][fact.index], "cue_contradiction": detective.cues[ContradictionCue][fact.index], "cue_entropy": detective.cues[EntropyCue][fact.index], "plausible": assignment.confidence.get( Variable(fact.index, Dimensions.PLAUSIBLE), 0), "typical": assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL), 0), "remarkable": assignment.confidence.get( Variable(fact.index, Dimensions.REMARKABLE), 0), "salient": assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT), 0), "plausible_percentile": assignment.confidence.get( Variable(fact.index, Dimensions.PLAUSIBLE), 0), "typical_percentile": assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL), 0), "remarkable_percentile": assignment.confidence.get( Variable(fact.index, Dimensions.REMARKABLE), 0), "salient_percentile": assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT), 0), }) df_facts = pd.DataFrame(data) del data n = df_facts.shape[0] print("Normalizing columns...") pbar = tqdm(total=20) for column in [ "plausible_percentile", "typical_percentile", "remarkable_percentile", "salient_percentile", "evidence_plausible", "evidence_typical", "evidence_remarkable", "evidence_salient", "cue_joint", "cue_necessity", "cue_sufficiency", "cue_implication", "cue_implication", "cue_entailment", "cue_contradiction", "cue_entropy" ]: df_facts = df_facts.sort_values(by=column) df_facts[column] = [i / (n - 1) for i in range(n)] pbar.update(1) for column in ["plausible", "typical", "remarkable", "salient"]: values = list() a, b = df_facts[column].min(), df_facts[column].max() for index, row in df_facts.iterrows(): values.append((row[column] - a) / (b - a)) df_facts[column] = values pbar.update(1) pbar.close() print("Gathering partition...") data = list() with open(partition_file) as file: for line in tqdm(file.readlines()): count, *indices = list(map(int, line.strip().split("\t"))) subjects = set([kb[j].subject for j in indices]) properties_all = list(set([kb[j].property for j in indices])) local_indices = [similarity.index[p] for p in properties_all] local_matrix = similarity.matrix[local_indices][:, local_indices] for i in range(count): fact = kb[indices[i]] if indices[i] not in selected_indices: continue property_index_self = similarity.index[fact.property] parents = list() children = list() siblings = list() if fact.subject in taxonomy.nodes: parents = [ "{neighbor}:{weight}".format( neighbor=neighbor, weight=taxonomy.weight(fact.subject, neighbor), ) for neighbor in subjects.intersection( taxonomy.predecessors(fact.subject)) ] children = [ "{neighbor}:{weight}".format( neighbor=neighbor, weight=taxonomy.weight(fact.subject, neighbor), ) for neighbor in subjects.intersection( taxonomy.successors(fact.subject)) ] siblings = [ "{neighbor}:{weight}".format( neighbor=neighbor, weight=taxonomy.weight(fact.subject, neighbor), ) for neighbor in subjects.intersection( taxonomy.siblings(fact.subject)) ] properties = list() for j, k in zip(*local_matrix.nonzero()): if local_indices[j] != property_index_self: continue properties.append(properties_all[k] + ":" + str(local_matrix[j, k])) data.append({ "index": indices[i], "parents": ";".join(parents), "children": ";".join(children), "siblings": ";".join(siblings), "properties": ";".join(properties), }) df_partition = pd.DataFrame(data) df = df_facts.set_index("index").join(df_partition.set_index("index"), on="index", how="outer") df.to_csv(output.path("demo.csv"), index=False)
def ground(self, include_evidence_rule=True): if self.verbose: print("Grounding...") for index in self.kb: for dimension in Dimensions.iter(): self.variables.append(Variable(index, dimension)) concepts_rules = (SimilarityRule(self, self.clauses), ) subconcept_rules = ( RulePlausibilityInheritance(self, self.clauses), RuleTypicalityInheritance(self, self.clauses), RulePlausibilityInference(self, self.clauses), RuleRemarkabilityInheritance(self, self.clauses), RuleTypicalPreventsRemarkable(self, self.clauses), RuleNotPlausibleImpliesRemarkable(self, self.clauses), ) siblings_rules = ( RuleNotPlausibleImpliesRemarkableSiblings(self, self.clauses), RuleRemarkabilitySiblings(self, self.clauses), RuleTypicalPreventsRemarkableSiblings(self, self.clauses), ) other_rules = ( RuleSalientImpliesPlausible, RuleTypicalImpliesPlausible, RuleTypicalAndRemarkableImplySalient, ExistenceRule, ) kb = self.inputs.get_kb() similarity = self.inputs.get_similarity_matrix() concept_links = dict() parent_links = dict() siblings_links = dict() for x in self.concepts: for fact_x in self.concepts[x]: concept_links[fact_x] = set(self.concepts[x]) parent_links[fact_x] = set() siblings_links[fact_x] = set() if x not in self.taxonomy.nodes: continue for child in self.taxonomy.successors(x): for fact_y in self.concepts.get(child, list()): parent_links[fact_x].add(fact_y) for sibling in self.taxonomy.siblings(x): for fact_y in self.concepts.get(sibling, list()): siblings_links[fact_x].add(fact_y) properties = dict() for fact in kb.values(): ip = similarity.index[fact.property] properties.setdefault(ip, set()) properties[ip].add(fact.index) inds = list(properties.keys()) submatrix = similarity.matrix[inds][:, inds] for i, j in zip(*submatrix.nonzero()): similarity_weight = submatrix[i, j] for fact_x in properties[inds[i]]: for fact_y in properties[inds[j]].intersection( parent_links[fact_x]): for rule in subconcept_rules: rule._ground(fact_x, fact_y, 1., similarity_weight) for fact_y in properties[inds[j]].intersection( siblings_links[fact_x]): for rule in siblings_rules: rule._ground(fact_x, fact_y, 1., similarity_weight) for fact_y in properties[inds[j]].intersection( concept_links[fact_x]): for rule in concepts_rules: rule._ground(fact_x, fact_y, similarity_weight) for rule in other_rules: rule(self, self.clauses).ground() if include_evidence_rule: EvidenceRule(self, self.clauses).ground() return self.variables, self.clauses
def _ground(self, fact): self.add( [Variable(fact, Dimensions.PLAUSIBLE)], [Variable(fact, Dimensions.TYPICAL)], similarity_weight=1, )