def plot(self, path): drawers = list() for dimension in Dimensions.iter(): drawers.append( EvidenceDrawer( path + "-" + Dimensions.label(dimension, slug=True), { index: self[index][dimension] for index in self.inputs.get_kb() }, Dimensions.label(dimension), self.inputs.get_kb(), self.inputs.get_taxonomy(), )) for cls in self.cues: drawers.append( EvidenceDrawer( path + "-" + cls.name, { index: self.cues[cls][index] for index in self.inputs.get_kb() }, cls.name, self.inputs.get_kb(), self.inputs.get_taxonomy(), )) for drawer in drawers: drawer.top() drawer.distrib()
def report(self, filename): counts = { d: {Assignment.TRUE: 0, Assignment.FALSE: 0} for d in Dimensions.iter() } for x, val in self.map.items(): counts[x.dimension][val] += 1 trues = [counts[d][Assignment.TRUE] for d in Dimensions.iter()] falses = [counts[d][Assignment.FALSE] for d in Dimensions.iter()] width = .4 def pos(offset): return [i + offset * width for i in range(len(trues))] plt.figure(figsize=(8, 8)) plt.bar(pos(0), trues, width, label="True") plt.bar(pos(1), falses, width, label="False") plt.xticks(pos(.5), [Dimensions.label(d) for d in Dimensions.iter()]) plt.ylabel("Number of facts") plt.legend(loc="best") plt.savefig(filename, format="png") plt.close() report = Report() for d in Dimensions.iter(): report.add_value_ratio( Dimensions.label(d), counts[d][Assignment.TRUE], counts[d][Assignment.TRUE] + counts[d][Assignment.FALSE] ) return report
def step_assigner(self): from dice.reason import Assigner assigner = Assigner(self, verbose=self.parameters["verbose"]) if bool(self.parameters["log"]): self.set_assignment( assigner.process( variables_path=self.logger.path("gurobi_variables.tsv"), constraints_path=self.logger.path( "gurobi_constraints.tsv"))) else: self.set_assignment(assigner.process()) if bool(self.parameters["log"]): assigner.report(self.logger.path("variables_usage.png"), self.logger.path("assignment_stats.png"), self.logger.path("clauses.tsv"), self.logger.path("clauses")).save( self.logger.path("report_assigner.txt")) self.get_assignment().save(self.logger.path("assignment.tsv"), self.get_kb()) self.get_assignment().log_true(self.logger.path("assignment.txt"), self.get_kb(), self.get_taxonomy()) for d in Dimensions.iter(): self.get_assignment().draw( self.get_kb(), self.get_taxonomy(), d, self.logger.path("top-" + Dimensions.label(d, slug=True) + ".svg"), )
def score(self, dimension, log=False): loss, count = 0., 0 for index, row in self.annotation.iterrows(): if abs(3 - row[dimension]) < self.CONFIDENCE: continue if (row["source_1"], int(row["index_1"])) not in self.tracker: continue if (row["source_2"], int(row["index_2"])) not in self.tracker: continue fact_1 = self.tracker[(row["source_1"], row["index_1"])] fact_2 = self.tracker[(row["source_2"], row["index_2"])] count += 1 this_loss = self.loss( self.predict(fact_1, fact_2, dimension), self.gold(row, dimension), ) if log and this_loss != 0: print("\t".join([ str(self.predict(fact_1, fact_2, dimension)), str(self.gold(row, dimension)), str(this_loss), Dimensions.label(dimension), repr(fact_1), repr(fact_2), ])) loss += this_loss return loss, count
def build(self, verbose=True): for cls in self.cues: if verbose: print("Gathering", cls.__name__) self.cues[cls] = cls() self.cues[cls].gather(self.inputs, verbose=verbose, joint_cue=self.cues[JointCue]) for index in self.inputs.get_kb(): self[index] = EvidenceWrapper(index, self.cues.values()) for dimension in Dimensions.iter(): if verbose: print("Normalizing", Dimensions.label(dimension), "evidence") self.normalize(dimension, verbose)
class TrackerFact(Fact): header = Fact.header + [ "{dimension}_{field}".format( dimension=Dimensions.label(dimension, slug=True), field=field) for dimension in Dimensions.iter() for field in ["evidence", "assignment", "confidence"] ] def __init__(self): Fact.__init__(self) self.attributes = { dimension: { field: None for field in ["evidence", "assignment", "confidence"] } for dimension in Dimensions.iter() } def from_fact(fact): tracker_fact = TrackerFact() tracker_fact.index = fact.index tracker_fact.subject = fact.subject tracker_fact.property = fact.property tracker_fact.modality = fact.modality tracker_fact.score = fact.score tracker_fact.text = fact.text tracker_fact.sense = fact.sense tracker_fact.source = fact.source return tracker_fact def __str__(self): return Fact.__str__(self) + "\t" + "\t".join( str(self.attributes[dimension][field]) for dimension in Dimensions.iter() for field in ["evidence", "assignment", "confidence"]) def parse(self, line): Fact.parse(self, line) split = line.strip().split("\t") i = 7 for dimension in Dimensions.iter(): for field in ["evidence", "assignment", "confidence"]: i += 1 if i % 3 == 0: self.attributes[dimension][field] = split[i] == "True" else: self.attributes[dimension][field] = float(split[i])
def log_true(self, path, kb, taxonomy): grouped = self.group() concepts = taxonomy.relation._imap with codecs.open(path, "w", "utf-8") as file: for concept in concepts: file.write("# " + concept + "\n\n") for d in Dimensions.iter(): file.write("## " + Dimensions.label(d) + "\n\n") for index in concepts[concept]: if grouped[index][d] == self.TRUE: file.write("{}\t{}\n".format( kb[index], self.confidence[Variable(index, d)] )) file.write("\n") file.write("\n")
def pair_evaluator(argv): """pair_evaluator arguments: <annotation-file> <feature> <confidence> <tracker-file>+ """ from dice.evaluation import Tracker, PairEvaluator from dice.constants import Dimensions annotation_file, feature, confidence, *tracker_files = argv PairEvaluator.FEATURE = feature PairEvaluator.CONFIDENCE = float(confidence) evaluator = PairEvaluator(annotation_file, *[Tracker(f) for f in tracker_files]) print(" " * 8 + "\t ppref\t size") for dimension, results in evaluator.evaluate(details=True).items(): if dimension == -1: print("Overall \t", round(1 - results["mae"], 2), "\t", results["n"]) else: print(Dimensions.label(dimension), "\t", round(1 - results["mae"], 2), "\t", results["n"])