def cmiR(x, y, z, k, data, discrete_dist=1): ''' computes conditional mutual information, I(x,y|z) input: x: list of indices for x y: list of indices for y z: list of indices for z k: hyper parameter for kNN data: pandas dataframe output: scalar value of I(x,y|z) from Rahimzamani ''' # compute CMI for I(x,y|z) using k-NN n, p = data.shape distArray = knncmi.getPairwiseDistArray(data, x + y + z, discrete_dist) if len(z) > 0: s = 0 for point in range(n): s = s + cmiPointR(point, x, y, z, k, distArray) return (s / n) else: s = 0 for point in range(n): s = s + miPointR(point, x, y, k, distArray) return (s / n)
def cmi4(x, y, z, k, data): ''' Computes Prop, RAVK, FP CMI faster by not redoing computation x: list of indices for x y: list of indices for y z: list of indices for z k: hyper parameter for kNN data: pandas dataframe output: tuple: (fp, ravk1, ravk2, prop), values of I(x,y|z) ''' n, p = data.shape if len(z) == 0: data['z'] = 0 z = [p+1] distArray = knncmi.getPairwiseDistArray(data, x + y + z) ptEsts = map(lambda obs: cmi4point(obs, x, y, z, k, distArray), range(n)) return sum(ptEsts)/n
def create_sub_array(dat, var_list, size, seed): np.random.seed(seed) rand_obs = np.random.choice(dat.shape[0], size, replace = True) sub_dat = dat.iloc[rand_obs] distArray = knncmi.getPairwiseDistArray(sub_dat, var_list) return distArray