def _init_pipeline(self, folder, subject, candidates): pipeline = Pipeline( folder, { "embedder_partial_output_path": "embedder-tmp-" + slugify(subject) + ".tsv", "entailer_partial_output_path": "entailer-tmp-" + slugify(subject) + ".tsv", }) kb = KnowledgeBase() for fact in list(self.subjects[subject]) + list(candidates.values()): copied_fact = copy.deepcopy(fact) copied_fact.index = len(kb) + 1 kb[copied_fact.index] = copied_fact pipeline.set_kb(kb) return pipeline
def objective(args): trackers = list() for folder in self.folders: pipeline = Pipeline( folder, reduce(lambda x, y: dict(x, **y), (args, { "log": "" }))) pipeline.load() if feature == "evidence": pipeline.step_detective() if feature == "confidence": pipeline.step_assigner() tracker = Tracker() tracker.build(pipeline) trackers.append(tracker) PairEvaluator.FEATURE = feature return PairEvaluator(self.annotation_file, *trackers).evaluate()
def extend(self, subject, folder, verbose=True): if verbose: print("Extending '{}'".format(subject)) candidates = self._generate_candidates(subject, verbose) if verbose: print("Adding", len(candidates), "facts.") if len(candidates) > 0: pipeline = self._init_pipeline(folder, subject, candidates) self._extend_inputs(subject, pipeline, verbose) tracker = self._extract_tracker(pipeline, verbose) else: pipeline = Pipeline(folder, dict()) tracker = Tracker() if os.path.isfile("embedder-tmp-" + slugify(subject) + ".tsv"): os.system("rm " + "embedder-tmp-" + slugify(subject) + ".tsv") if os.path.isfile("entailer-tmp-" + slugify(subject) + ".tsv"): os.system("rm " + "entailer-tmp-" + slugify(subject) + ".tsv") return tracker
def objective(args): pipeline = Pipeline(inputs_folder, args) Parameters.process(**args) if feature == "evidence": pipeline.load_detective() # pipeline.step_detective() if feature == "confidence": bulk_pipeline = BulkPipeline(inputs_folder, partition) if os.path.isdir(BulkTuner.BULK_TUNER_FOLDER): shutil.rmtree(BulkTuner.BULK_TUNER_FOLDER) bulk_pipeline.process(BulkTuner.BULK_TUNER_FOLDER, int(n_jobs)) del bulk_pipeline assignment = BulkGatherer( BulkTuner.BULK_TUNER_FOLDER).gather(False) pipeline.set_assignment(assignment) tracker = Tracker() tracker.build(pipeline) PairEvaluator.FEATURE = feature PairEvaluator.CONFIDENCE = .5 print(PairEvaluator(self.annotation_file, tracker).evaluate(True)) return PairEvaluator(self.annotation_file, tracker).evaluate()
def __call__(self, worker): if os.path.isfile(self.path("assignment.tsv")): return None if worker.verbose: print(os.getpid(), self.concept, len(facts)) self.parameters["log"] = False pipeline = Pipeline(self.path(""), self.parameters) pipeline.set_kb(worker.inputs.get_kb().extract(self.facts), save=False) pipeline.set_taxonomy(worker.inputs.get_taxonomy(), save=False) pipeline.set_similarity_matrix(worker.inputs.get_similarity_matrix(), save=False) pipeline.set_detective(worker.inputs.get_detective(), save=False) pipeline.step_assigner() central_facts = self.facts[:self.n_central_facts] assignment = pipeline.get_assignment() for var in list(assignment.map.keys()): if var.index not in central_facts: del assignment.map[var] assignment.save(self.path("assignment.tsv")) del pipeline return None
def pre_process(self): Pipeline(self.inputs_folder, self.parameters).process(list(range(1, 8)))
def pipeline(argv): """pipeline arguments: <inputs-folder> [parameter=value]* options: NAME DEFAULT VALUE log False verbose False notify False logger_path log kb_path data/kb.tsv entailer_batch_size 100 entailer_n_jobs 2 steps 1-8 parameters: SIMILARITY_THRESHOLD = .75 REMARKABLE_ALPHA = 0.2861481307916379 TYPICAL_ALPHA = 0.9982891056446265 TYPICAL_BETA = 0.0009590134436654157 PLAUSIBLE_ALPHA = 0.3420266845860523 PLAUSIBLE_BETA = 0.6786119833435241 EVIDENCE_OFFSET = 1.3396791371188632 ASSIGNMENT_METHOD = 1 TAXONOMY_BUILDER = "webisalod" TAXONOMY_BUILDER_LOWS_THRESHOLD = 10 TAXONOMY_BUILDER_EDGE_THRESHOLD = .4 FUSE_ALPHA = .8 FUSE_THRESHOLD = .1 RULE_EXISTENCE_WEIGHT = 10. RULE_NOT_PLAUSIBLE_IMPLIES_REMARKABLE_SIBLINGS_WEIGHT = 0.4136813802555934 RULE_NOT_PLAUSIBLE_IMPLIES_REMARKABLE_WEIGHT = 0.8385061349318154 RULE_PLAUSIBILITY_INFERENCE_WEIGHT = 0.8899064124827547 RULE_PLAUSIBILITY_INHERITANCE_WEIGHT = 0.09630439631215232 RULE_REMARKABILITY_INHERITANCE_WEIGHT = 0 RULE_REMARKABILITY_SIBLINGS_WEIGHT = 0.01 RULE_SALIENT_IMPLIES_PLAUSIBLE_WEIGHT = 0.10873639531265827 RULE_TYPICAL_AND_REMARKABLE_IMPLY_SALIENT_WEIGHT = 0 RULE_TYPICAL_IMPLIES_PLAUSIBLE_WEIGHT = 0.5446133523460819 RULE_TYPICAL_PREVENTS_REMARKABLE_SIBLINGS_WEIGHT = 0.0331447944410993 RULE_TYPICAL_PREVENTS_REMARKABLE_WEIGHT = 0.05332233624133091 RULE_TYPICALITY_INFERENCE_WEIGHT = 0.42207454477107076 RULE_TYPICALITY_INHERITANCE_WEIGHT = 0.5266373056914903 RULE_SIMILARITY_WEIGHT = 0.9843530983393707 DUMMY_INJECTER_THRESHOLD = 0.0002 DUMMY_INJECTER_SOURCE = "DUMMY" taxonomy_builder: {conceptnet|webisalod|wordnet|merged} assignment method: 0: Maximum Satisfiability 1: Integer Linear Programming steps: 1. Loading knowledge base 2. Building taxonomy 3. Embedding facts 4. Computing similarity matrix 5. Computing probability 6. Computing entailment 7. Computing evidence 8. Assigning dimensions """ from dice import Pipeline inputs_folder, *parameters = argv args = {p.split("=")[0]: p.split("=")[1] for p in parameters} pipeline = Pipeline(inputs_folder, args) steps = [i for i in range(1, 9)] if "steps" in args: steps = [] for isolated_range in args["steps"].split(","): if "-" in isolated_range: start, stop = tuple(map(int, isolated_range.split("-"))) steps += [i for i in range(start, stop + 1)] else: steps.append(int(isolated_range)) pipeline.process(steps)