Python rel_entr Examples, scipy.special.rel_entr Python Examples

Example #1

0

Show file

File: ModelEvaluator.py Project: philippwindischhofer/HiggsPivoting

    def _get_JS(p, p_weights, q, q_weights, binning, base = 2):
        if isinstance(binning, int):
            # in case have only specified the number of bins to use, generate an actual binning here
            binning = np.linspace(np.min([p, q]), np.max([p, q]), num = binning, endpoint = True)

        # first, need to bin p and q to get two "probability vectors" that can be easily compared
        p_binned, _ = np.histogram(np.clip(p, binning[0], binning[-1]), bins = binning, weights = p_weights, density = True)
        q_binned, _ = np.histogram(np.clip(q, binning[0], binning[-1]), bins = binning, weights = q_weights, density = True)

        # make sure they do not contain negative entries
        p_binned = np.maximum(p_binned, 0.0)
        q_binned = np.maximum(q_binned, 0.0)

        # renormalize
        p_binned /= np.sum(p_binned)
        q_binned /= np.sum(q_binned)

        # this code is taken (almost) verbatim from https://github.com/scipy/scipy/blob/c42462a/scipy/spatial/distance.py#L1239-L1296
        m_binned = (p_binned + q_binned) / 2.0
        left = rel_entr(p_binned, m_binned)
        right = rel_entr(q_binned, m_binned)

        js = np.sum(left, axis = 0) + np.sum(right, axis = 0)

        if base is not None:
            js /= np.log(base)

        return js

Example #2

0

Show file

def jensenshannon(p, q, base=None):
    """
    Compute the Jensen-Shannon distance (metric) between
    two 1-D probability arrays. This is the square root
    of the Jensen-Shannon divergence.

    The Jensen-Shannon distance between two probability
    vectors `p` and `q` is defined as,

    .. math::

       \\sqrt{\\frac{D(p \\parallel m) + D(q \\parallel m)}{2}}

    where :math:`m` is the pointwise mean of :math:`p` and :math:`q`
    and :math:`D` is the Kullback-Leibler divergence.

    This routine will normalize `p` and `q` if they don't sum to 1.0.

    Parameters
    ----------
    p : (N,) array_like
        left probability vector
    q : (N,) array_like
        right probability vector
    base : double, optional
        the base of the logarithm used to compute the output
        if not given, then the routine uses the default base of
        scipy.stats.entropy.

    Returns
    -------
    js : double
        The Jensen-Shannon distance between `p` and `q`

    .. versionadded:: 1.0.2

    Examples
    --------
    >>> from scipy.spatial import distance
    >>> distance.jensenshannon([1.0, 0.0, 0.0], [0.0, 1.0, 0.0], 2.0)
    1.0
    >>> distance.jensenshannon([1.0, 0.0], [0.5, 0.5])
    0.46450140402245893
    >>> distance.jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0])
    0.0

    """
    p = np.asarray(p)
    q = np.asarray(q)
    p = p / np.sum(p, axis=0)
    q = q / np.sum(q, axis=0)
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = np.sum(left, axis=0) + np.sum(right, axis=0)
    if base is not None:
        js /= np.log(base)
    return np.sqrt(js / 2.0)

Example #3

0

Show file

File: Clusterizer.py Project: Cyberthreat-Intelligence-Lab/Compromised-Botnet-Clustering

def jensenshannon(p, q):
    p = np.asarray(p)
    q = np.asarray(q)
    p = p / np.sum(p, axis=0)
    q = q / np.sum(q, axis=0)
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = np.sum(left, axis=0) + np.sum(right, axis=0)
    return np.sqrt(js / 2.0)

Example #4

0

Show file

def js(p, q):
    """Calculate Jensen-Shannon Distance between ground truth 
    array p and privatized array q.
    """
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = np.sum(left, axis=0) + np.sum(right, axis=0)
    js /= np.log(2)
    return np.sqrt(js / 2.0)

Example #5

0

Show file

def js_div(px, py):
    """
    Jensen-Shannon Divergence, which is a smoothed version of KL divergence.
    
    px: Probability of x (float or array of floats)
    py: Probability of y (float or array of floats)
    """
    midpoint = (px + py) * 0.5
    js = rel_entr(px, midpoint) * 0.5 + rel_entr(py, midpoint) * 0.5
    return np.sum(js)

Example #6

0

Show file

File: data_processing.py Project: Nathaniel-Rodriguez/tdavis

def jensenshannon(p, q, base=None):
    p = np.asarray(p)
    q = np.asarray(q)
    p = p / np.sum(p, axis=0)
    q = q / np.sum(q, axis=0)
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = np.sum(left, axis=0) + np.sum(right, axis=0)
    if base is not None:
        js /= np.log(base)
    return np.sqrt(js / 2.0)

Example #7

0

Show file

File: utils.py Project: epiasini/embo-github-mirror

def kl_divergence(p, q, axis=0):
    """Compute KL divergence (in bits) between p and q, DKL(P||Q)."""
    p = np.asarray(p)
    p = 1.0 * p / np.sum(p, axis=axis, keepdims=True)
    q = np.asarray(q)
    q = 1.0 * q / np.sum(q, axis=axis, keepdims=True)
    return np.sum(rel_entr(p, q), axis=axis) / np.log(2)

Example #8

0

Show file

def jensenshannon(p, q, base=None):
    """
    Returns the JS divergence between two 1-dimensional probability vectors,
    code taken from scipy and modified to fix bug
    """
    p = np.asarray(p)
    q = np.asarray(q)
    p = p / np.sum(p, axis=0)
    q = q / np.sum(q, axis=0)
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = max(0, np.sum(left, axis=0) + np.sum(right, axis=0))
    if base is not None:
        js /= np.log(base)
    return np.sqrt(js / 2.0)

Example #9

0

Show file

File: plot_stats.py Project: stsmall/abc_scripts2

def calc_kl(df, pop, stat, col2):
    """Compare two prob distributions.
    
    https://machinelearningmastery.com/divergence-between-probability-distributions/

    Parameters
    ----------
    df : TYPE
        DESCRIPTION.
    pop : TYPE
        DESCRIPTION.
    stat : TYPE
        DESCRIPTION.
    col2 : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    """
    dfpop = df[df["pops"] == pop]
    stats_list = dfpop[stat].unique()
    for i in stats_list:
        obs = dfpop[(dfpop[stat] == i) & (dfpop["df_id"] == "obs")]
        sim = dfpop[(dfpop[stat] == i) & (dfpop["df_id"] == "sim")]
        ent = sum(rel_entr(obs[col2].values, sim[col2].values))
        kl = sum(kl_div(obs[col2].values, sim[col2].values))
        js = jensenshannon(obs[col2].values, sim[col2].values, base=2)
        print(f"{stat} {i}: rel_entr {ent}, KL_div {kl}, js_dist {js} bits")

Example #10

0

Show file

    def objective_log_linear(weights):
        """General objective function for log-linear pooling
        (Abbas 2009 (9))

        Parameters
        ----------

        weights : numeric or array_like
            Input data.

        Returns
        -------

        result : float
            Log-linear pooled probability.

        """

        # Compute log-linear pooled prob with given weights
        pooling_pooled, pooling_reg_const = log_linear_pooling(P, weights)

        # Compute log-linear payoff (Abbas (9)) (here higher is worse)
        kls = np.zeros(nviews)
        pooling_pooled_p = 1.0 * pooling_pooled / np.sum(pooling_pooled)
        for i, qk in enumerate(P):
            qk = 1.0 * qk / np.sum(qk)
            vec = rel_entr(pooling_pooled_p, qk)
            kls[i] = np.sum(vec)

        payoff = np.sum(np.dot(kls, weights))

        # Introduce constraint sum(weights)=1 through a penalty
        penalty = abs(1 - np.sum(weights))
        goal = payoff + penalty
        return (-goal)

Example #11

0

Show file

def cost_KL(relevance, freq_lists, ideal_proportions_lists, weight_list,
            doc_list):
    cost = weight_list[0] * relevance  #relevance is normalized between 0 and 1
    proportion_lists = []
    new_freq_lists = []
    for i in range(len(freq_lists)):
        new_freq_lists.append(freq_lists[i].copy())
        for j in range(len(new_freq_lists[i])):
            new_freq_lists[i][j] += doc_list[i][j]

    for fl in new_freq_lists:
        pl = []
        for freq in fl:
            if sum(fl) > 0:
                pl.append(freq / sum(fl))
            else:
                pl.append(freq)
        proportion_lists.append(pl)
    #print("Doc List: " + str(doc_list))
    #print("New Freq Lists: " + str(new_freq_lists))
    #print("Prop lists: " + str(proportion_lists))
    for i, pl in enumerate(proportion_lists):
        #print("rel_entr for " + str(pl) + str(sum(special.rel_entr(pl, ideal_proportions_lists[i]))))
        #print("weight: " + str(weight_list[i + 1]))
        cost += sum(special.rel_entr(pl, ideal_proportions_lists[i])
                    ) * weight_list[i + 1]  # +1 since relevance is weight[0]
    #print("Cost: " + str(cost) + "\n")
    return cost

Example #12

0

Show file

File: DataProcessor.py Project: mtang724/Self-Balancing-Federated-Learning

 def get_kl_divergence(input1, input2):
     c1, c2 = collections.Counter(input1), collections.Counter(input2)
     d1, d2 = [], []
     for key in c1.keys():
         d1.append(c1[key] / len(input1))
         d2.append(c2[key] / len(input2))
     return sum(sp.rel_entr(d1, d2))

Example #13

0

Show file

File: InterruptionAnalysis.py Project: ngmaclaren/interruption-networks

def get_kld(dat, ref, dist_name, bins=75):
    """
    Find the Kullback-Leibler divergence from ~ref~ to ~dat~ using SciPy's rel_entr() function. A hypothesized distribution is required, as this function fits bot the ref and the data to the same probability distribution (fitted separately): ~dist_name~ should be one of SciPy's probability distributions (https://docs.scipy.org/doc/scipy/reference/stats.html).
    """
    dist = getattr(stats, dist_name)

    y, x = np.histogram(ref, bins=bins)
    x = (x + np.roll(x, -1))[:-1] / 2.0
    #y = y/np.sum(y)

    d_params = dist.fit(dat)
    d_args = d_params[:-2]
    d_loc = d_params[-2]
    d_scale = d_params[-1]
    dpdf = dist.pdf(x, loc=d_loc, scale=d_scale, *d_args)

    r_params = dist.fit(ref)
    r_args = r_params[:-2]
    r_loc = r_params[-2]
    r_scale = r_params[-1]
    rpdf = dist.pdf(x, loc=r_loc, scale=r_scale, *r_args)

    # dw = dist.fit(dat) # returns params
    # rw = dist.fit(ref)

    # dpdf = dist.pdf(x, c = dw[0], loc = dw[1], scale = dw[2])
    # rpdf = dist.pdf(x, c = rw[0], loc = rw[1], scale = rw[2])

    dy = dpdf / np.sum(dpdf)
    ry = rpdf / np.sum(rpdf)

    return sum(rel_entr(dy, ry))

Example #14

0

Show file

    def metric(real, synthetic):
        """
        This approximates the KL divergence by binning the continuous values
        to turn them into categorical values and then computing the relative
        entropy.

        TODO:
            * Investigate a KDE-based approach.

        Arguments:
            real (np.ndarray): The values from the real database.
            synthetic (np.ndarray): The values from the synthetic database.

        Returns:
            (str, Goal, str, tuple): A tuple containing (value, goal, unit, domain)
            which corresponds to the fields in a Metric object.
        """
        real[np.isnan(real)] = 0.0
        synthetic[np.isnan(synthetic)] = 0.0

        real, xedges, yedges = np.histogram2d(real[:, 0], real[:, 1])
        synthetic, _, _ = np.histogram2d(synthetic[:, 0],
                                         synthetic[:, 1],
                                         bins=[xedges, yedges])

        f_obs, f_exp = synthetic.flatten() + 1e-5, real.flatten() + 1e-5
        f_obs, f_exp = f_obs / np.sum(f_obs), f_exp / np.sum(f_exp)

        value = np.sum(rel_entr(f_obs, f_exp))
        return value, Goal.MINIMIZE, "entropy", (0.0, float("inf"))

Example #15

0

Show file

    def violation(self, norm_ord=np.inf, rough=False):
        """
        Return a measure of violation for the constraint that ``self.v`` belongs to
        :math:`C_{\\mathrm{SAGE}}(\\alpha, X)^{\\dagger}`.

        Parameters
        ----------
        norm_ord : int
            The value of ``ord`` passed to numpy ``norm`` functions, when reducing
            vector-valued residuals into a scalar residual.

        rough : bool
            Setting ``rough=False`` computes violation by solving an optimization
            problem. Setting ``rough=True`` computes violation by taking norms of
            residuals of appropriate elementwise equations and inequalities involving
            ``self.v`` and auxiliary variables.

        Notes
        -----
        When ``rough=False``, the optimization-based violation is computed by projecting
        the vector ``self.v`` onto a new copy of a dual SAGE constraint, and then returning
        the L2-norm between ``self.v`` and that projection. This optimization step essentially
        re-solves for all auxiliary variables used by this constraint.
        """
        v = self.v.value
        viols = []
        for i in self.ech.U_I:
            selector = self.ech.expcovers[i]
            num_cover = self.ech.expcover_counts[i]
            if num_cover > 0:
                expr1 = np.tile(v[i], num_cover).ravel()
                expr2 = v[selector].ravel()
                lowerbounds = special_functions.rel_entr(expr1, expr2)
                mat = -(self.alpha[selector, :] - self.alpha[i, :])
                mu_i = self._lifted_mu_vars[i].value
                # compute rough violation for this dual AGE cone
                residual = mat @ mu_i[:self._n] - lowerbounds
                residual[residual >= 0] = 0
                curr_viol = np.linalg.norm(residual, ord=norm_ord)
                if (self.X is not None) and (not np.isnan(curr_viol)):
                    AbK_val = self.X.A @ mu_i + v[i] * self.X.b
                    AbK_viol = PrimalProductCone.project(AbK_val, self.X.K)
                    curr_viol += AbK_viol
                # as applicable, solve an optimization problem to compute the violation.
                if (curr_viol > 0 or np.isnan(curr_viol)) and not rough:
                    temp_var = Variable(shape=(self._lifted_n,), name='temp_var')
                    cons = [mat @ temp_var[:self._n] >= lowerbounds]
                    if self.X is not None:
                        con = PrimalProductCone(self.X.A @ temp_var + v[i] * self.X.b, self.X.K)
                        cons.append(con)
                    prob = Problem(CL_MIN, Expression([0]), cons)
                    status, value = prob.solve(verbose=False)
                    if status in {CL_SOLVED, CL_INACCURATE} and abs(value) < 1e-7:
                        curr_viol = 0
                viols.append(curr_viol)
            else:
                viols.append(0)
        viol = max(viols)
        return viol

Example #16

0

Show file

 def metric(real, synthetic):
     assert real.shape[1] == 2, "Expected 2d data."
     assert synthetic.shape[1] == 2, "Expected 2d data."
     real = [(x[0], x[1]) for x in real]
     synthetic = [(x[0], x[1]) for x in synthetic]
     f_obs, f_exp = frequencies(real, synthetic)
     value = np.sum(rel_entr(f_obs, f_exp))
     return value, Goal.MINIMIZE, "entropy", (0.0, float("inf"))

Example #17

0

Show file

File: calculation.py Project: explorer2326/Stance-Detection-Fake-News-Challenge

def get_kl(pk, qk):

    pk = asarray(pk)
    pk = 1.0 * pk / np.sum(pk, axis=0)
    qk = asarray(qk)
    qk = 1.0 * qk / np.sum(qk, axis=0)
    vec = rel_entr(pk, qk)
    S = np.sum(vec, axis=0)
    return S

Example #18

0

Show file

File: pyspark-SOM.py Project: MartinK25/pyspark-SOM

def jsd(p_distb, q_distb):  #, base=None):
    """Jensen Shannon Distance

    Args:
        p_distb (array): first vector (discrete distribution)
        q_distb (array): second vector

    Returns:
        Jensen Shannon Distance

    """
    p = np.asarray(p_distb)  #makes almost no difference to leave this out
    q = np.asarray(q_distb)
    m = (p + q) / 2.0
    left = rel_entr(p, m)
    right = rel_entr(q, m)
    js = np.sum(left, axis=0) + np.sum(right, axis=0)
    return np.sqrt(js / 2.0)

Example #19

0

Show file

def np_jensenshannon_divergence(X, Y, base=None):
    """Compute Jensen-Shannon Divergence
    Parameters
    ----------
    X : array-like
        possibly unnormalized distribution.
    Y : array-like
        possibly unnormalized distribution. Must be of same shape as ``X``.
    Returns
    -------
    j : float
    See Also
    --------
    entropy : function
        Computes entropy and K-L divergence
    """
    X, Y = np.atleast_2d(X), np.atleast_2d(Y)
    m = .5 * (X + Y)
    js = np.sum(rel_entr(X, m) + rel_entr(Y, m), axis=1)
    if base is not None:
        js /= np.log(base)
    return .5 * js

Example #20

0

Show file

def kldiv_neighbor_dists(data_matrix, query_matrix_batch):
    """Compute values of the KL-divergence for dense vectors.

    :param data_matrix:             data matrix
    :param query_matrix_batch:      query matrix
    :return: an output in the shape <#of queries> X min(K, <# of data points>)
    """

    dists_batch = []
    for k in range(len(query_matrix_batch)):
        v = rel_entr(data_matrix, query_matrix_batch[k])
        dists_batch.append(np.sum(v, axis=-1))

    return dists_batch

Example #21

0

Show file

File: test_constraints.py Project: rabidsquirrel4/sageopt

    def test_ordinary_sage_primal_2(self):
        n, m = 2, 6
        np.random.seed(0)
        alpha = 1 * np.random.randn(m - 1, n)
        conv_comb = np.random.rand(m - 1)
        conv_comb /= np.sum(conv_comb)
        alpha_last = alpha.T @ conv_comb
        alpha = np.row_stack([alpha, alpha_last])
        c0 = np.array([1, 2, 3, 4, -0.5, -0.1])
        c = Variable(shape=(m, ), name='projected_c0')
        t = Variable(shape=(1, ), name='epigraph_var')
        sage_constraint = sage_cones.PrimalSageCone(c,
                                                    alpha,
                                                    X=None,
                                                    name='test')
        epi_constraint = vector2norm(c - c0) <= t
        constraints = [sage_constraint, epi_constraint]
        prob = Problem(CL_MIN, t, constraints)
        prob.solve(solver='ECOS')

        # constraint violations
        v0 = sage_constraint.violation(norm_ord=1, rough=False)
        assert v0 < 1e-6
        v1 = sage_constraint.violation(norm_ord=np.inf, rough=True)
        assert v1 < 1e-6

        # certificates
        w4 = sage_constraint.age_witnesses[4].value
        c4 = sage_constraint.age_vectors[4].value
        drop4 = np.array([True, True, True, True, False, True])
        level4 = np.sum(rel_entr(w4[drop4], np.exp(1) * c4[drop4])) - c4[4]
        assert level4 < 1e-6
        w5 = sage_constraint.age_witnesses[5].value
        c5 = sage_constraint.age_vectors[5].value
        drop5 = np.array([True, True, True, True, True, False])
        level5 = np.sum(rel_entr(w5[drop5], np.exp(1) * c5[drop5])) - c5[5]
        assert level5 < 1e-6

Example #22

0

Show file

File: qua.py Project: prajjwalrohilla/Nutella

 def scipy_entropy(pk, qk=None, base=None):
     pk = np.asarray(pk)
     pk = 1.0 * pk / np.sum(pk, axis=0)
     if qk is None:
         vec = special.entr(pk)
     else:
         qk = np.asarray(qk)
         if len(qk) != len(pk):
             raise ValueError("qk and pk must have same length.")
         qk = 1.0 * qk / np.sum(qk, axis=0)
         vec = special.rel_entr(pk, qk)
     S = np.sum(vec, axis=0)
     if base is not None:
         S /= np.log(base)
     return S

Example #23

0

Show file

def compute_kl_div_neighbors(data_matrix, query_matrix, K):
    """Compute neighbors for the KL-divergence. By default,
       in NMSLIB, queries are left, i.e., the data object is
       the first (left) argument.

    :param data_matrix:      data matrix
    :param query_matrix:     query matrix
    :param K:                the number of neighbors
    :return: an output in the shape <#of queries> X min(K, <# of data points>)
    """
    dists = []
    for i in range(len(query_matrix)):
        v = rel_entr(data_matrix, query_matrix[i])
        dists.append(np.sum(v, axis=-1))

    return get_neighbors_from_dists(np.stack(dists, axis=0), K)

Example #24

0

Show file

File: iom_functions.py Project: BogdanMihalache/Kullback-Leibler_Images

def kl(hists, p):
    # p: index of chosen image
    # h: index of most similar image to chosen image
    most_similar = {
        'KL': float('inf'),  # initialize KL
        'P': p,
        'Q1': 0,
        'Q2': 0
    }
    for h in range(len(hists)):
        if h == p:
            continue

        kl_now = sum(rel_entr(hists[p], hists[h]))
        if kl_now < most_similar['KL']:
            most_similar['KL'] = kl_now
            most_similar['Q2'] = most_similar['Q1']
            most_similar['Q1'] = h

    return most_similar

Example #25

0

Show file

 def test_relent_1(self):
     # compilation and evaluation
     x = Variable(shape=(2, ), name='x')
     y = Variable(shape=(2, ), name='y')
     re = relent(2 * x, np.exp(1) * y)
     con = [re <= 10, 3 <= x, x <= 5]
     # compilation
     A, b, K, _, _, _ = compile_constrained_system(con)
     A_expect = np.array([
         [0., 0., 0., 0., -1.,
          -1.],  # linear inequality on epigraph for relent constr
         [1., 0., 0., 0., 0., 0.],  # bound constraints on x
         [0., 1., 0., 0., 0., 0.],  #
         [-1., 0., 0., 0., 0., 0.],  # more bound constraints on x
         [0., -1., 0., 0., 0., 0.],  #
         [0., 0., 0., 0., -1., 0.],  # first exponential cone
         [0., 0., 2.72, 0., 0., 0.],  #
         [2., 0., 0., 0., 0., 0.],  #
         [0., 0., 0., 0., 0., -1.],  # second exponential cone
         [0., 0., 0., 2.72, 0., 0.],  #
         [0., 2., 0., 0., 0., 0.]
     ])  #
     A = np.round(A.toarray(), decimals=2)
     assert np.all(A == A_expect)
     assert np.all(
         b == np.array([10., -3., -3., 5., 5., 0., 0., 0., 0., 0., 0.]))
     assert K == [
         Cone('+', 1),
         Cone('+', 2),
         Cone('+', 2),
         Cone('e', 3),
         Cone('e', 3)
     ]
     # value propagation
     x0 = np.array([1, 2])
     x.value = x0
     y0 = np.array([3, 4])
     y.value = y0
     actual = re.value
     expect = np.sum(rel_entr(2 * x0, np.exp(1) * y0))
     assert abs(actual - expect) < 1e-7

Example #26

0

Show file

File: operations.py Project: vcerqueira/vest-python

    def run_transform_operations(x: np.ndarray, y: np.ndarray) -> Dict:
        """

        :param x:
        :param y:
        :return:
        """

        if isinstance(x, pd.Series):
            x = x.values

        if isinstance(y, pd.Series):
            y = y.values

        p_diff = percentage_difference(x, y)
        cross_corr = signal.correlate(x, y)
        conv_x_y = signal.convolve(x, y)
        relative_entropy = rel_entr(x, y)

        if any(np.isnan(x)):
            x[np.isnan(x)] = np.nanmedian(x)

        if any(np.isnan(y)):
            y[np.isnan(y)] = np.nanmedian(y)

        xy_density, _, _ = np.histogram2d(x, y, normed=True)
        marginal_density = np.apply_along_axis(np.nanmean,
                                               axis=1,
                                               arr=xy_density)

        xy_transformed = \
            dict(pdiff=p_diff,
                 ccorr=cross_corr,
                 conv=conv_x_y,
                 density=marginal_density,
                 entropy=relative_entropy)

        return xy_transformed

Example #27

0

Show file

File: evaluation.py Project: kpnDataScienceLab/synthetic-data-eval

    def kl_divergence(self):
        """ This metric is also defined at the variable level and examines whether the distributions of the attributes are
        identical and measures the potential level of discrepancy between them.
        The threshold limit for this metric is a value below 2"""

        target_columns = list(self.origdst.columns[11:-3])
        target_columns.append(self.origdst.columns[1])  # channel
        target_columns.append(self.origdst.columns[2])  # program_title
        target_columns.append(self.origdst.columns[3])  # genre

        kl_dict = {}

        for col in target_columns:

            try:

                col_counts_orig = self.origdst[col].value_counts(
                    normalize=True).sort_index(ascending=True)
                col_counts_synth = self.synthdst[col].value_counts(
                    normalize=True).sort_index(ascending=True)

                kl = sum(
                    rel_entr(col_counts_orig.tolist(),
                             col_counts_synth.tolist()))

                kl_dict[col] = kl

            except:

                print(
                    'For the column ', col,
                    ' you must generate the same unique values as the real dataset.'
                )
                print(
                    'The number of unique values than you should generate for column ',
                    col, 'is ', len(self.origdst[col].unique()))

        return kl_dict

Example #28

0

Show file

File: evaluate.py Project: gowthamkuntumalla/Motif_Finding_DNA

def KL_divergence(data_i=0):
    """
	Relative Entropy b/w
	
	P: True distribution = “motif.txt”
		and
	Q: Model Predicted/Approximate distribution = “predictedmotif.txt”
	
	Output:
		: D_KL(P||Q)
	"""
    motif = import_motif('' + fileprefix + 'results/dataset' + str(data_i) +
                         '/motif.txt')  # list of lists
    predictedmotif = import_motif('' + fileprefix + 'results/dataset' +
                                  str(data_i) + '/predictedmotif.txt')

    rel_ent = 0

    for i in range(len(motif[1:-1])):
        # compare each row (ACGT) against each other in two matrices
        # row_diff = sum((motif[i][j] * log(motif[i][j]/predictedmotif[i][j])  for j in range(len(motif[i]))))
        row_diff = rel_entr(motif[i], predictedmotif[i])
        rel_ent += row_diff
    return rel_ent

Example #29

0

Show file

File: random_funcs.py Project: annikc/MEMRL

def get_KLD(data,probe_state,trial_num):
    probe_rep = state_reps[probe_state]

    KLD_array = np.zeros(env.shape)
    KLD_array[:] = np.nan
    entropy_array = np.zeros(env.shape)
    entropy_array[:] = np.nan
    ec_pol_grid = np.zeros((*env.shape,4))#np.zeros(env.shape, dtype=[(x, 'f8') for x in env.action_list])

    blank_mem = Memory(cache_limit=400, entry_size=4)
    blank_mem.cache_list = data['ec_dicts'][trial_num]
    probe_pol = blank_mem.recall_mem(probe_rep)

    #for k in state_reps.keys():
        #sr_rep = state_reps[k]
    for sr_rep in blank_mem.cache_list.keys():
        k = blank_mem.cache_list[sr_rep][2]
        pol = blank_mem.recall_mem(sr_rep)
        twoD = env.oneD2twoD(k)
        KLD_array[twoD] = sum(rel_entr(list(probe_pol),list(pol)))
        ec_pol_grid[twoD][:] = pol
        entropy_array[twoD] = entropy(pol,base=2)

    return KLD_array,ec_pol_grid,entropy_array

Example #30

0

Show file

File: 03_kl_divergence_scipy.py Project: Qthr/DS_Exemples

# example of calculating the kl divergence (relative entropy) with scipy
from scipy.special import rel_entr
# define distributions
p = [0.10, 0.40, 0.50]
q = [0.80, 0.15, 0.05]
# calculate (P || Q)
kl_pq = rel_entr(p, q)
print('KL(P || Q): %.3f nats' % sum(kl_pq))
# calculate (Q || P)
kl_qp = rel_entr(q, p)
print('KL(Q || P): %.3f nats' % sum(kl_qp))

Example #31

0

Show file

File: 20160404_vectorizer_jsd_new_data.py Project: yzjing/ao3

def JSD(P, Q):
    M = 0.5 * (P + Q)
    return 0.5 * (sum(special.rel_entr(P, M)) + sum(special.rel_entr(Q, M)))