def tryModel(self, model): kl = dkl(model,model) self.assert_(is_finite(kl)) cbn = CBN.from_bn(model.copy(copy_domain=True)) v = choice(tuple(cbn.variables())) f = cbn[v] dat = rand_factor_data(len(f.data())) change_one = None for i,(a,b) in enumerate(zip(f.data(),dat)): if round(a-b,4) == 0: dat[i] += uniform(1.0,100.0) cbn._replace_factor( v , CPT(Factor(variables=f.variables() ,data=dat ,domain=cbn), v, cpt_force=True)) ikl = dkl(model,cbn) self.assert_(is_finite(ikl)) self.assert_(ikl >= kl) kl = dkl(cbn,cbn) self.assert_(is_finite(kl)) ikl_ = dkl(cbn,model) self.assert_(ikl_ >= kl)
def statistic(self, joint_counts, marginal_counts_x, marginal_counts_y, marginal_counts_s): """Hypothesis: P(X,Y|Z) = P(X|Z) P(Y|Z) Using G^2 = 2 sum_{i \in instances} x_i ln (x_i / e_i) e_i = x_{i+k}x_{+jk} / x_{++k} """ # calculate the expected number of observations assuming the variables # are independent expected_counts = (marginal_counts_x * marginal_counts_y) / marginal_counts_s # calculate the ln (x_i / e_i) term log_exp_joint_counts = joint_counts / expected_counts log_exp_joint_counts.map(rlog) # now calculate the rest of the G^2 statistic, dropping any entries # that turn out to be zero (so imply non-finite elements) statistic = joint_counts * log_exp_joint_counts return 2*sum([x for x in statistic.data() if is_finite(x)])