Beispiel #1
0
def cmiR(x, y, z, k, data, discrete_dist=1):
    '''
    computes conditional mutual information, I(x,y|z)
    input:
    x: list of indices for x
    y: list of indices for y
    z: list of indices for z
    k: hyper parameter for kNN
    data: pandas dataframe

    output:
    scalar value of I(x,y|z) from Rahimzamani
    '''
    # compute CMI for I(x,y|z) using k-NN
    n, p = data.shape
    distArray = knncmi.getPairwiseDistArray(data, x + y + z, discrete_dist)
    if len(z) > 0:
        s = 0
        for point in range(n):
            s = s + cmiPointR(point, x, y, z, k, distArray)
        return (s / n)
    else:
        s = 0
        for point in range(n):
            s = s + miPointR(point, x, y, k, distArray)
        return (s / n)
Beispiel #2
0
def cmi4(x, y, z, k, data):
    '''
    Computes Prop, RAVK, FP CMI faster by not redoing computation
    x: list of indices for x
    y: list of indices for y
    z: list of indices for z
    k: hyper parameter for kNN
    data: pandas dataframe

    output:
    tuple: (fp, ravk1, ravk2, prop), values of I(x,y|z)
    '''
    n, p = data.shape
    if len(z) == 0:
        data['z'] = 0
        z = [p+1]
        
    distArray = knncmi.getPairwiseDistArray(data, x + y + z)
    ptEsts = map(lambda obs: cmi4point(obs, x, y, z, k, distArray), range(n))
    return sum(ptEsts)/n
Beispiel #3
0
def create_sub_array(dat, var_list, size, seed):
    np.random.seed(seed)
    rand_obs = np.random.choice(dat.shape[0], size, replace = True)
    sub_dat = dat.iloc[rand_obs]
    distArray = knncmi.getPairwiseDistArray(sub_dat, var_list)
    return distArray