def __refresh_statistics(self): ''' Recalculates the statistics for this rule. ''' self.coverage = self.covered_examples.count() indices = self.kb.bits_to_indices(self.covered_examples) ex_scores = [self.kb.get_score(idx) for idx in indices] if self.target_type == Example.Ranked: self.mean = avg(ex_scores) self.sd = std(ex_scores) self.score = self.kb.score_fun(self) else: self.distribution = defaultdict(int) for score in ex_scores: self.distribution[score] += 1 self.score = self.kb.score_fun(self)
def __init__(self, triplets, score_fun, instances_as_leaves=True): ''' Initialize the knowledge base with the given triplet graph. The target class is given with 'target_class' - this is the class to be described in the induction step. ''' self.instances_as_leaves = instances_as_leaves self.score_fun = score_fun self.sub_class_of = defaultdict(list) self.super_class_of = defaultdict(list) self.predicates = set() self.binary_predicates = set() self.class_values = set() self.annotation_name = defaultdict(list) self.examples, all_annotations = self._build_examples(triplets) # Ranked or class-labeled data self.target_type = self.examples[0].target_type self._build_subclassof(triplets) self._calc_predicate_members(triplets) self._find_roots(all_annotations) self._calc_members_closure() self._calc_binary_members() self._propagate_annotation_names(triplets) # Statistics if self.target_type == Example.Ranked: self.mean = avg([ex.score for ex in self.examples]) self.sd = std([ex.score for ex in self.examples]) else: self.distribution = defaultdict(int) for ex in self.examples: self.distribution[ex.score] += 1 logger.debug('Class distribution: %s' % str(self.distribution))
def __init__(self, triplets, score_fun, instances_as_leaves=True): """ Initialize the knowledge base with the given triplet graph. The target class is given with 'target_class' - this is the class to be described in the induction step. """ self.instances_as_leaves = instances_as_leaves self.score_fun = score_fun self.sub_class_of = defaultdict(list) self.super_class_of = defaultdict(list) self.predicates = set() self.binary_predicates = set() self.class_values = set() self.annotation_name = defaultdict(list) self.examples, all_annotations = self._build_examples(triplets) # Ranked or class-labeled data self.target_type = self.examples[0].target_type self._build_subclassof(triplets) self._calc_predicate_members(triplets) self._find_roots(all_annotations) self._calc_members_closure() self._calc_binary_members() self._propagate_annotation_names(triplets) # Statistics if self.target_type == Example.Ranked: self.mean = avg([ex.score for ex in self.examples]) self.sd = std([ex.score for ex in self.examples]) else: self.distribution = defaultdict(int) for ex in self.examples: self.distribution[ex.score] += 1 logger.debug("Class distribution: %s" % str(self.distribution))