def run(kwargs, cli=False): if cli: logger.setLevel(logging.DEBUG if kwargs['verbose'] else logging.INFO) else: logger.setLevel(logging.NOTSET) logger.info('Starting Hedwig') start = time.time() start_date = datetime.now().isoformat() graph = build_graph(kwargs) logger.info('Building the knowledge base') score_func = getattr(scorefunctions, kwargs['score']) kb = ExperimentKB(graph, score_func, instances_as_leaves=kwargs['leaves']) validator = Validate(kb, significance_test=significance.apply_fisher, adjustment=getattr(adjustment, kwargs['adjust'])) rules_per_target = run_learner(kwargs, kb, validator) rules_report = generate_rules_report(kwargs, rules_per_target) end = time.time() time_taken = end - start logger.info('Finished in %d seconds' % time_taken) logger.info('Outputing results') if kwargs['covered']: with open(kwargs['covered'], 'w') as f: examples = Rule.ruleset_examples_json(rules_per_target) f.write(json.dumps(examples, indent=2)) parameters_report = _parameters_report(kwargs, start_date, time_taken) rules_out_file = kwargs['output'] if rules_out_file: with open(rules_out_file, 'w') as f: if rules_out_file.endswith('json'): f.write( Rule.to_json(rules_per_target, show_uris=kwargs['uris'])) else: f.write(parameters_report) f.write(rules_report) elif cli: print parameters_report print rules_report return rules_per_target
def run(kwargs, cli=False): if cli: logger.setLevel(logging.DEBUG if kwargs['verbose'] else logging.INFO) else: logger.setLevel(logging.NOTSET) logger.info('Starting Hedwig') start = time.time() start_date = datetime.now().isoformat() graph = build_graph(kwargs) logger.info('Building the knowledge base') score_func = getattr(scorefunctions, kwargs['score']) kb = ExperimentKB(graph, score_func, instances_as_leaves=kwargs['leaves']) validator = Validate(kb, significance_test=significance.apply_fisher, adjustment=getattr(adjustment, kwargs['adjust'])) rules_per_target = run_learner(kwargs, kb, validator) rules_report = generate_rules_report(kwargs, rules_per_target) end = time.time() time_taken = end-start logger.info('Finished in %d seconds' % time_taken) logger.info('Outputing results') if kwargs['covered']: with open(kwargs['covered'], 'w') as f: examples = Rule.ruleset_examples_json(rules_per_target) f.write(json.dumps(examples, indent=2)) parameters_report = _parameters_report(kwargs, start_date, time_taken) rules_out_file = kwargs['output'] if rules_out_file: with open(rules_out_file, 'w') as f: if rules_out_file.endswith('json'): f.write(Rule.to_json(rules_per_target, show_uris=kwargs['uris'])) else: f.write(parameters_report) f.write(rules_report) elif cli: print parameters_report print rules_report return rules_per_target
def induce(self): ''' Induces rules for the given knowledge base. ''' root_pred = self.kb.get_root() rules = [Rule(self.kb, predicates=[root_pred], target=self.target)] rules = self.__induce_level(rules) return filter(interesting, rules)
def generate_rules_report(kwargs, rules_per_target, human=lambda label, rule: label): rules_report = '' for _, rules in rules_per_target: if rules: rules_report += Rule.ruleset_report(rules, show_uris=kwargs['uris'], human=human) rules_report += '\n' if not rules_report: rules_report = 'No significant rules found' return rules_report
def induce(self): ''' Induces rules for the given knowledge base. ''' kb = self.kb has_min_sup = lambda pred: kb.get_members(pred).count() >= self.min_sup all_predicates = filter(has_min_sup, kb.predicates) rules = [] for depth in range(1, self.depth+1): for attrs in combinations(all_predicates, depth): rule = Rule(kb, predicates=self._labels_to_predicates(attrs), target=self.target) rules.append(rule) rules = sorted(rules, key=lambda r: r.score, reverse=True) return rules[:self.n]