예제 #1
0
def one_trial(i, n_samples, algsel, problem, n_select, hsic_e, params):
    p, r = problem.sample(n_samples, i)

    p_bw = util.meddistance(p, subsample=1000)**2
    r_bw = util.meddistance(r, subsample=1000)**2
    hsic_e = hsic_e(kernel.KGauss(p_bw), kernel.KGauss(r_bw),
                    params(n_samples))

    feat_select = algsel(hsic_e, params=True)

    results = feat_select.test(p, r, args=n_select, seed=i)

    ## True selected features.
    if results['sel_vars'].shape[0] > 1:
        true = problem.is_true(results['sel_vars'])
        n_true = np.sum(true)
        fpr = np.sum(results['h0_rejs'][np.logical_not(true)]) / max(
            n_select - n_true, 1)
        tpr = np.sum(results['h0_rejs'][true]) / max(n_true, 1)
    else:
        tpr, fpr = 0, 0
    logging.debug("TPR is :{0:.3f}  FPR is :{1:.3f}".format(tpr, fpr))
    return tpr, fpr
예제 #2
0
def estimate_M_unbiased(X):
    """Estimating M with unbiased HSIC-estimator"""
    p = X.shape[1]
    x_bw = util.meddistance(X, subsample=1000)**2
    kx = kernel.KGauss(x_bw)
    hsic_M = hsic.HSIC_U(kx, kx)
    M_true = np.zeros((p, p))
    for i in range(p):
        for j in range(i + 1):
            M_true[i, j] = hsic_M.compute(X[:, i, np.newaxis], X[:, j,
                                                                 np.newaxis])
            M_true[j, i] = M_true[i, j]  # due to symmetry
    M = nearestPD(M_true)  # positive definite approximation
    return M_true, M
예제 #3
0
def estimate_M(X, estimator, B, ratio):
    """Estimating M with Block or incomplete U-statistics estimator
    :param B: Block size
    :param ratio: size of incomplete U-statistics estimator
    """
    p = X.shape[1]
    x_bw = util.meddistance(X, subsample = 1000)**2
    kx = kernel.KGauss(x_bw)
    if estimator == 'inc':
        hsic_M = hsic.HSIC_Inc(kx, kx, ratio = ratio)
    else: # 'block'
        hsic_M = hsic.HSIC_Block(kx, kx, bsize = B)

    M_true = np.zeros((p, p))
    for i in range(p):
        for j in range(i+1):
            M_true[i, j] = np.mean(hsic_M.estimates(X[:, i, np.newaxis], X[:, j, np.newaxis]))
            M_true[j, i] = M_true[i, j]
    M = nearestPD(M_true) # positive definite approximation
    return M_true, M
예제 #4
0
def estimate_M_unbiased_parallel(X):
    """Parallelised estimation of M with unbiased HSIC-estimator"""
    p = X.shape[1]
    x_bw = util.meddistance(X, subsample = 1000)**2
    kx = kernel.KGauss(x_bw)
    hsic_M = hsic.HSIC_U(kx, kx)
    M_true = np.zeros((p, p))
    
    def one_calc(i, j):
        return hsic_M.compute(X[:,i,np.newaxis], X[:, j, np.newaxis])
    
    par = Parallel(n_jobs = -1)
    res = par(delayed(one_calc)(i, j) for i in range(p) for j in range(i+1))
    sp = 0
    for i in range(p):
        for j in range(i+1):
            M_true[i, j] = M_true[j, i] = res[sp + j]
        sp += i+1
    M = nearestPD(M_true) # positive definite approximation
    return M_true, M
예제 #5
0
def one_trial(i, n_samples, algsel, problem, n_select, mmd_e):
    p, r = problem.sample(n_samples, i)

    bw = util.meddistance(np.vstack((p, r)), subsample=1000)**2
    mmd_u = mmd_e(kernel.KGauss(bw))

    feat_select = algsel(mmd_u)

    results = feat_select.test(p, r, args=n_select, seed=i)

    ## True selected features.
    if results['sel_vars'].shape[0] > 1:
        true = problem.is_true(results['sel_vars'])
        n_true = np.sum(true)
        fpr = np.sum(results['h0_rejs'][np.logical_not(true)]) / max(
            n_select - n_true, 1)
        tpr = np.sum(results['h0_rejs'][true]) / max(n_true, 1)
    else:
        tpr, fpr = 0, 0
    logging.debug("TPR is :{0:.3f}  FPR is :{1:.3f}".format(tpr, fpr))
    return tpr, fpr
예제 #6
0
    def sel_inf(self,
                X,
                Y,
                inf_type,
                alpha,
                niv,
                H0=None,
                M0=None,
                i=None,
                unbiased_parallel=False,
                n_jobs=20):
        """Post-selection inference
        :param X, Y: covariate and response data
        :param inf_type: one-sided hypothesis testing or two-sided confidence interval calculation
        :param alpha: level 1-alpha
        :param niv: number of important variables, used for reporting results
        H0, M0, i, unbiased_parallel and n_jobs are not used
        """
        assert inf_type == 'test'
        p = X.shape[1]
        # Initialising kernels
        x_bw = util.meddistance(X, subsample=1000)**2
        kx = kernel.KGauss(x_bw)
        if self.discrete_output:
            values, counts = np.unique(Y, return_counts=True)
            freq_dict = dict(zip(values, counts))
            ky = KDiscrete(freq_dict, freq_dict)
        else:
            y_bw = util.meddistance(Y[:, np.newaxis], subsample=1000)**2
            ky = kernel.KGauss(y_bw)

        if self.estimator == 'inc':
            hsic_H = hsic.HSIC_Inc(kx, ky, ratio=self.l)
        else:  # 'block'
            hsic_H = hsic.HSIC_Block(kx, ky, bsize=self.B)

        if self.poly:
            feat_select = PolySel(hsic_H)
        else:  # multi
            feat_select = MultiSel(hsic_H)

        # Behaviour for evaluation of power w.r.t. first feature
        if self.only_evaluate_first:
            params = hsic_H.compute(X, Y[:, np.newaxis])
            sel_vars = np.argpartition(params, -self.n_select,
                                       axis=0)[-self.n_select:]
            # only continue if the first feature was selected
            if 0 in sel_vars:
                results = feat_select.test(X,
                                           Y[:, np.newaxis],
                                           args=self.n_select,
                                           alpha=alpha)
                sel_vars = results['sel_vars']
                h0_rejs = results['h0_rejs']
            else:
                # fake values
                sel_vars = np.arange(p - self.n_select, p)
                h0_rejs = np.array([self.n_select - 1])
        # Regular behaviour
        else:
            results = feat_select.test(X,
                                       Y[:, np.newaxis],
                                       args=self.n_select,
                                       alpha=alpha)
            sel_vars = results['sel_vars']
            h0_rejs = results['h0_rejs']

        # Reporting
        ind_sc_np = None
        ind_sel_np = np.zeros(p)
        ind_sel_np[sel_vars] = 1

        ind_h0_rej = np.zeros(p)
        ind_h0_rej[sel_vars[h0_rejs]] = 1
        ind_h0_rej = {'H': ind_h0_rej}

        ind_h0_rej_true = np.zeros(p)
        ind_h0_rej_true[sel_vars] = 1
        ind_h0_rej_true[niv:] = 0
        ind_h0_rej_true = {'H': ind_h0_rej_true}

        p_values = -np.ones(p)
        # p-values not provided for Poly
        # p-values not of interest for evaluation of empirical power
        if not self.poly and not self.only_evaluate_first:
            p_values[sel_vars] = results['pvals']
        p_values = {'H': p_values}

        inf_res = sim.Inference_Result(p, ind_sc_np, ind_sel_np, ind_h0_rej,
                                       ind_h0_rej_true, p_values, None)
        return inf_res