def Kolchinsky_estimation(par_object, MI_object, labelprobs, label_indices,
                          higher_lower_flag, par_flag):
    """
    estimates mutual information using KDE either parallel or sequential
    par_object: parameter object (output object)
    MI_object: mutual information object
    labelprobs: probabilities of the class labels
    label_indices: array with indices of the different classes in the dataset
    higher_lower_flag: flag that decides whether higher or lower bound KDE is used
    par_flag: flag that decides whether prallel or sequential
    returns: mutual information object
    partly taken from:
    https://github.com/artemyk/ibsgd
    """
    noise_variance = 1e-3
    # nats to bits conversion factor
    nats2bits = 1.0 / np.log(2)
    if higher_lower_flag == True:
        KDE_estimator_func = KDE.entropy_estimator_kl
    else:
        KDE_estimator_func = KDE.entropy_estimator_bd

    if par_flag:
        MI_object.mi_x, MI_object.mi_y = Par_Kolchinsky_estimation(
            par_object, noise_variance, labelprobs, KDE_estimator_func,
            label_indices)
    else:
        for key in par_object.dic.keys():
            T = par_object.dic[key][0]
            entropy_T = KDE_estimator_func(T, noise_variance)[0]
            # Compute conditional entropies of layer activity given output
            entropy_T_giv_Y = 0.
            for i in label_indices.keys():
                entropy_cond = KDE_estimator_func(T[label_indices[i], :],
                                                  noise_variance)[0]
                entropy_T_giv_Y += labelprobs[i] * entropy_cond
            # Layer activity given input. This is simply the entropy of the Gaussian noise
            entropy_T_giv_X = KDE.kde_condentropy(T, noise_variance)

            MI_object.mi_x[key] = nats2bits * (entropy_T - entropy_T_giv_X)
            MI_object.mi_y[key] = nats2bits * (entropy_T - entropy_T_giv_Y)
            if key[1] == 1 and (key[0] % 50 == 0 or key[0] <= 30):
                print("calculated KDE MI_X and MI_Y for epoch:", key[0])
    return MI_object
def Par_Kolchinsky_estimation(par_object, noise_variance, labelprobs, function,
                              label_indices):
    """
    parallel kde estimation and adds mutual information to dictionaries
    par_object: parameter object (output object)
    noise_variance: added noises variance
    labelprobs: probabilities of the class labels
    function: upper or lower KDE
    label_indices: array with indices of the different classes in the dataset
    returns: dictionaries with mutual information
    """
    print("Starting Kolchinsky calculation for MI in parallel")
    nats2bits = 1.0 / np.log(2)
    dic_x = {}
    dic_y = {}
    with Parallel(n_jobs=CPUS) as parallel:
        for key in par_object.dic.keys():
            T = par_object.dic[key][0]
            entropy_T = function(T, noise_variance)[0]
            # Compute conditional entropies of layer activity given output
            entropy_T_giv_Y_array = []
            #parallelized calculation
            entropy_T_giv_Y_array = np.array(
                parallel(
                    delayed(Kolchinsky_par_helper)
                    (T[label_indices[i], :], noise_variance, labelprobs, i,
                     function) for i in label_indices.keys()))
            entropy_T_giv_Y = np.sum(entropy_T_giv_Y_array)

            # Layer activity given input. This is simply the entropy of the Gaussian noise
            entropy_T_giv_X = KDE.kde_condentropy(T, noise_variance)

            dic_x[key] = nats2bits * (entropy_T - entropy_T_giv_X)
            dic_y[key] = nats2bits * (entropy_T - entropy_T_giv_Y)

        return dic_x, dic_y