def get_doc_vector(self, doc_index=None, doc_name=None, n_dim=None):
     if n_dim is None:
         n_dim = self.dimension
     if doc_name is None:
         td_column = self._get_doc_column(doc_index)
     elif doc_index is None:
         td_column = self._get_doc_column(self.ldocs.index(doc_name))
     if n_dim == self.dimension:
         sigma_inverse = [1 / s for s in self.sigma]
         ut = transpose(self.u)
     else:
         sigma_inverse = [1 / s for s in self.plot_sigma]
         ut = transpose(self.plot_u)
     ut_d = matmul(ut, td_column)
     doc_vector = [sigma_inverse[i] * ut_d[i] for i in range(n_dim)]
     return doc_vector
Beispiel #2
0
def ccprmod(supports, idx_correct_label, B=20):
    """Python implementation of the ccprmod.m (Classifier competence based on probabilistic modelling)
    function. Matlab code is available at:
    http://www.mathworks.com/matlabcentral/mlc-downloads/downloads/submissions/28391/versions/6/previews/ccprmod.m/index.html

    Parameters
    ----------
    supports: array of shape = [n_samples, n_classes]
              containing the supports obtained by the base classifier for each class.

    idx_correct_label: array of shape = [n_samples]
                       containing the index of the correct class.

    B : int (Default = 20)
        number of points used in the calculation of the competence, higher values result
        in a more accurate estimation.

    Returns
    -------
    C_src : array of shape = [n_samples]
            representing the classifier competences at each data point

    Examples
    --------
    >>> supports = [[0.3, 0.6, 0.1],[1.0/3, 1.0/3, 1.0/3]]
    >>> idx_correct_label = [1,0]
    >>> ccprmod(supports,idx_correct_label)
    ans = [0.784953394056843, 0.332872292262951]

    References
    ----------
    T.Woloszynski, M. Kurzynski, A probabilistic model of classifier competence for dynamic ensemble selection,
    Pattern Recognition 44 (2011) 2656–2668.
    """
    if not isinstance(B, int):
        raise TypeError(
            'Parameter B should be an integer. Currently B is {0}'.format(
                type(B)))

    if B <= 0 or B is None:
        raise ValueError(
            'The parameter B should be higher than 0. Currently B is {0}'.
            format(B))

    supports = np.asarray(supports)
    idx_correct_label = np.array(idx_correct_label)
    supports[supports > 1] = 1

    N, C = supports.shape

    x = np.linspace(0, 1, B)
    x = np.matlib.repmat(x, N, C)

    a = npm.zeros(x.shape)

    for c in range(C):
        a[:, c * B:(c + 1) * B] = C * supports[:, c:c + 1]

    b = C - a

    # For extreme cases, with a or b equal to 0, add a small constant:
    eps = 1e-20
    a[a == 0] = eps
    b[b == 0] = eps
    betaincj = betainc(a, b, x)

    C_src = np.zeros(N)
    for n in range(N):
        t = range((idx_correct_label[n]) * B, (idx_correct_label[n] + 1) * B)
        bc = betaincj[n, t]
        bi = betaincj[n, list(set(range(0, (C * B))) - set(t))]
        bi = npm.transpose(npm.reshape(bi, (B, C - 1), order='F'))
        C_src[n] = np.sum(
            np.multiply((bc[0, 1:] - bc[0, 0:-1]),
                        np.prod((bi[:, 0:-1] + bi[:, 1:]) / 2, 0)))

    return C_src
Beispiel #3
0
 def _find(mat):
     return [p[0] for p in nm.transpose(nm.nonzero(mat)).tolist()]
Beispiel #4
0
#_________Label Data normalization______# not useful______________________________________________________

maxx2 = np.max(output_mat[:,0])
output_mat[:,0] = output_mat[:,0]/maxx2
print("\n Labels Data normalization done")
#_________________________________________________________________________________________________________
"""

#_________Important Features Extraction___________________________________________________________________

ip_mean = npmat.mean(input_mat, 0)  # column means found out
ip_mean_mat = npmat.repmat(ip_mean, new_row,
                           1)  # use repmat for creating matrix
ip_mean_sub = input_mat - ip_mean_mat  # subtract mean from columns

ip_mat_cvar = (npmat.transpose(ip_mean_sub) *
               ip_mean_sub) / my_row  # covariance matrix
dg = np.diagonal(ip_mat_cvar)  # variance
dg = np.sqrt(dg)  # std dev
dg = npmat.matrix(dg)  # matrix form conversion
scaled_cov = np.transpose(
    dg) * dg  # scaling the covariance to get Coorelation matrix
ip_mat_corel = np.divide(ip_mat_cvar, scaled_cov)  # corelation input matrix

#plt.matshow(ip_mat_corel)
#plt.show()

corel_thresh = 1.00  # set threshold of corelation
unimp_feat = npmat.zeros((new_row, 1))
# initialize unimportant features matrix
unimp_count = 0
Beispiel #5
0
        def _find(mat): return [p[0] for p in
                nm.transpose(nm.nonzero(mat)).tolist()]

        open_positions = _find(self.board.state == BLANK)