Python siegelslopesの例、scipy.stats.siegelslopes Pythonの例

コード例 #1

0

ファイルを表示

ファイル: resit_fit.py プロジェクト: kimpenn/resistant-fit-norm-bibe-2020

def linear_resist_fit_robust(x, y, p=0.5):
    np.seterr(all='raise')
    # print('Running linear resist fit...')
    n_gene = x.shape[0]
    # print('number of genes: ', n_gene)
    x_reg = x
    y_reg = y
    loss_pre = float('Inf')
    # Set the initial points of slope and intercept
    for i in range(3000):
        slope, intercept = stats.siegelslopes(x_reg, y_reg)
        abline_values = np.asarray([slope * x_iter + intercept for x_iter in x])
        square_list = np.square(y - abline_values)
        square_list_index_sort = np.argsort(square_list)
        # sub_index = square_list_index_sort[1:int(n_gene * 0.3) + 1]
        sub_index = square_list_index_sort[0: int(n_gene * p) + 1]

        loss = np.sum(square_list[sub_index])

        delta_loss = abs(loss_pre - loss)
        if delta_loss < 0.00001:
            print('convergence')
            break
        else:
            loss_pre = loss
        # Update x and y for next iteration of linear regression
        x_reg = x[sub_index]
        y_reg = y[sub_index]
        print(i, loss, delta_loss)

    return abline_values

コード例 #2

0

ファイルを表示

def linear_fit(tt, xx, yy, eyy, method='ls'):
    #xx = 0.67*xx
    log_x = np.log10(xx)
    log_y = np.log10(yy)
    log_e_y = eyy / yy / np.log(10.)

    if method == 'ls':
        popt, pcov = curve_fit(lin_func, log_x, log_y, sigma=log_e_y)
        a, b = popt
        ea, eb = np.sqrt(np.diag(pcov))

    elif method == 'bces':
        log_e_x = eyy * 1.e-20 / np.log(10.) / yy
        cov = np.zeros_like(log_x)
        a_bces, b_bces, aerr_bces, berr_bces, covab = bces.bces.bces(
            log_x, log_e_x, log_y, log_e_y, cov)
        a = a_bces[3]
        ea = aerr_bces[3]
        b = b_bces[3]
        eb = berr_bces[3]
        #b = 10.**b_bces[3]
        #e_b = berr_bces[3] * 10.**b_bces[3] * np.log(10)

    elif method == 'siegel_h':
        a, b = stats.siegelslopes(log_y, log_x)
        eb, ea = 0, 0
    elif method == 'siegel_s':
        a, b = stats.siegelslopes(log_y, log_x, method='separate')
        eb, ea = 0, 0

    elif method == 'theil_sen':
        a, b, am, ap = stats.theilslopes(log_y, log_x, 0.68)
        eb, ea = a - am, 0

    elif method == 'rlm':
        log_X = sm.add_constant(log_x)
        resrlm = sm.RLM(log_y, log_X).fit()
        b, a = resrlm.params
        eb, ea = resrlm.bse

    #a,b = popt
    #ea ,eb = np.sqrt(np.diag(pcov))
    par = [a, 10**b]
    per = [ea, 10.**b * np.log(10) * eb]
    fit = pow_law_func(tt, par[0], par[1])
    return par, per, fit

コード例 #3

0

ファイルを表示

def timing_parameters(geom, image, peak_time, hillas_parameters, cleaning_mask=None):
    """
    Function to extract timing parameters from a cleaned image.

    Parameters
    ----------
    geom: ctapipe.instrument.CameraGeometry
        Camera geometry
    image : array_like
        Pixel values
    peak_time : array_like
        Time of the pulse extracted from each pixels waveform
    hillas_parameters: ctapipe.containers.HillasParametersContainer
        Result of hillas_parameters
    cleaning_mask: optionnal, array, dtype=bool
        The pixels that survived cleaning, e.g. tailcuts_clean
        The non-masked pixels must verify signal > 0

    Returns
    -------
    timing_parameters: TimingParametersContainer
    """

    unit = geom.pix_x.unit

    if cleaning_mask is not None:
        image = image[cleaning_mask]
        geom = geom[cleaning_mask]
        peak_time = peak_time[cleaning_mask]

    if (image < 0).any():
        raise ValueError("The non-masked pixels must verify signal >= 0")

    h = hillas_parameters
    pix_x, pix_y, x, y, length, width = all_to_value(
        geom.pix_x, geom.pix_y, h.x, h.y, h.length, h.width, unit=unit
    )

    longi, _ = camera_to_shower_coordinates(
        pix_x, pix_y, x, y, hillas_parameters.psi.to_value(u.rad)
    )

    # use polyfit just to get the covariance matrix and errors
    (_s, _i), cov = np.polyfit(longi, peak_time, deg=1, w=np.sqrt(image), cov=True)
    slope_err, intercept_err = np.sqrt(np.diag(cov))

    # re-fit using a robust-to-outlier algorithm
    slope, intercept = siegelslopes(x=longi, y=peak_time)
    predicted_time = polyval(longi, (intercept, slope))
    deviation = np.sqrt(np.sum((peak_time - predicted_time) ** 2) / peak_time.size)

    return TimingParametersContainer(
        slope=slope / unit,
        intercept=intercept,
        deviation=deviation,
        slope_err=slope_err / unit,
        intercept_err=intercept_err,
    )

コード例 #4

0

ファイルを表示

ファイル: resit_fit.py プロジェクト: kimpenn/resistant-fit-norm-bibe-2020

def linear_resist_fit_robust_mix(x, y, p_default=0.5, verbose=False):
    iter_limit = 20
    np.seterr(all='raise')
    # print('Running linear resist fit...')
    n_gene = x.shape[0]
    # print('number of genes: ', n_gene)
    select_list, p = preprocess(x, y)
    # print(f'p: {p}')
    n_select = np.sum(select_list)

    x_select = x[select_list].copy()
    y_select = y[select_list].copy()

    x_reg = x_select
    y_reg = y_select
    loss_pre = float('Inf')

    # Robust regression on the whole dataset to ignore outliers
    slope, intercept = stats.siegelslopes(y_reg, x_reg)
    abline_values = np.asarray([slope * x_iter + intercept for x_iter in x_select])
    square_list = np.square(y_select - abline_values)
    square_list_index_sort = np.argsort(square_list)
    sub_index = square_list_index_sort[0:int(n_select * p) + 1]
    x_reg = x_select[sub_index]
    y_reg = y_select[sub_index]
    if verbose:
        print(f'[siegelslopes] slope:{slope},  intercept:{intercept}')
    # Set the initial points of slope and intercept
    for i in range(iter_limit):
        slope, intercept, r_value, p_value, std_err = stats.linregress(x_reg, y_reg)
        # slope, intercept = stats.siegelslopes(y_reg, x_reg)

        abline_values = np.asarray([slope * x_iter + intercept for x_iter in x_select])
        square_list = np.square(y_select - abline_values)
        square_list_index_sort = np.argsort(square_list)
        sub_index = square_list_index_sort[1:int(n_select * p) + 1]

        loss = np.sum(square_list[sub_index])

        delta_loss = abs(loss_pre - loss)
        if delta_loss < 0.00001:
            if verbose:
                print('convergence')
            break
        else:
            loss_pre = loss
        # Update x and y for next iteration of linear regression
        x_reg = x_select[sub_index]
        y_reg = y_select[sub_index]

        if i == iter_limit:
            if not verbose:
                print('[Resist Fit] Reach iteration limit')

    # abline_values = np.asarray([slope * x_iter + intercept for x_iter in x])
    abline_values = slope * x + intercept
    # abline_values = slope * x
    # print(f'# of 0s in abline: {np.sum(abline_values == 0)}')
    # abline_values[select_zero_genes(x, y)] = 0
    abline_values[x == 0] = 0
    # print(f'# of 0s in new abline: {np.sum(abline_values == 0)}')
    if verbose:
        print(f'[Resist Fit] slope: {slope}, intercept: {intercept}')
        print(f'depth(y_select): {np.sum(y_select)}, \n'
          f'depth(x_select): {np.sum(x_select)}, \n'
          f'depth(x): {np.sum(x)},\n'
          f'depth(norm): {np.sum(abline_values)},\n'
          f'depth(y): {np.sum(y)}\n'
          f'y_select/x_select: {np.sum(y_select) / np.sum(x_select)}')
    return abline_values, slope, intercept

コード例 #5

0

ファイルを表示

def _resistant_fit_linear(
        source: np.ndarray,
        target: np.ndarray,
        p: np.float32 = 0.75,
        verbose: bool = False,
        init_step: str = "ransac"
) -> Tuple[np.ndarray, np.float32, np.float32]:
    """
    Use resistant fit to normalize cell x (source) to the reference cell y (target).

    :param source: the gene expression of the cell x
    :param target: the gene expression of the cell y, reference cell
    :param p: the size of biological feature set
    :param verbose: verbose flag for debug
    :return:
        y_regression: the normalized 1d array
        slope: the final slope from EM Regression
        intercept: the final intercept from EM Regression
    """
    ########################################
    # Select valid genes for regression
    ########################################
    iter_limit = 20
    np.seterr(all='raise')
    select_mask = _preprocess(source, target)
    n_select = np.sum(select_mask)

    x_select = source[select_mask].copy()  # Note that len(x_select) <= source
    y_select = target[select_mask].copy()

    ############################################################
    # Init EM step: robust regression on all genes
    ############################################################
    # Robust regression on the whole dataset to ignore outliers
    if init_step == "ransac":
        ransac = linear_model.RANSACRegressor(random_state=42)
        ransac.fit(x_select.copy().reshape(-1, 1),
                   y_select.copy().reshape(-1, 1))
        slope, intercept = float(ransac.estimator_.coef_), float(
            ransac.estimator_.intercept_)
    elif init_step == "siegel":
        slope, intercept = stats.siegelslopes(y_select, x_select)
    elif init_step == "theil":
        slope, intercept, _, _ = stats.theilslopes(y_select, x_select)
    else:
        raise NameError(
            "init_step must be chosen from list ['ransac', 'siegel', 'theil']")

    y_regression = np.asarray(
        [slope * x_iter + intercept for x_iter in x_select])
    square_list = np.square(y_select - y_regression)
    square_list_index_sort = np.argsort(square_list)
    sub_index = square_list_index_sort[0:int(n_select * p) + 1]

    # Set Biological Feature Set (BFS)
    x_bfs = x_select[sub_index]
    y_bfs = y_select[sub_index]

    if verbose:
        logger.info(f'[Init EM step] slope:{slope},  intercept:{intercept}')

    ############################################################
    # Resistant Fit Regression on BFS
    ############################################################
    loss_pre = np.Inf
    for i in range(iter_limit):
        # E step: Linear regression on BFS
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            x_bfs, y_bfs)

        y_regression = np.asarray(
            [slope * x_iter + intercept for x_iter in x_select])
        square_list = np.square(y_select - y_regression)
        square_list_index_sort = np.argsort(square_list)
        sub_index = square_list_index_sort[1:int(n_select * p) + 1]

        loss = np.sum(square_list[sub_index])

        delta_loss = abs(loss_pre - loss)
        if delta_loss < 0.00001:
            if verbose:
                logger.info('convergence')
            break
        else:
            loss_pre = loss

        # M step: Update x and y for next iteration of linear regression
        x_bfs = x_select[sub_index]
        y_bfs = y_select[sub_index]

        if i == iter_limit:
            if not verbose:
                logger.info('[Resist Fit] Reach iteration limit')

    ############################################################
    # Normalize cell y based on regression model
    ############################################################
    y_regression = slope * source + intercept
    # If x is zero then clip y to 0
    # TODO: x is unlikely to be zero.
    y_regression[source == 0] = 0

    if verbose:
        logger.info(f'[Resist Fit] slope: {slope}, intercept: {intercept}')
        logger.info(
            f'depth(y_select): {np.sum(y_select)}, \n'
            f'depth(x_select): {np.sum(x_select)}, \n'
            f'depth(x): {np.sum(source)},\n'
            f'depth(norm): {np.sum(y_regression)},\n'
            f'depth(y): {np.sum(target)}\n'
            f'y_select/x_select: {np.sum(y_select) / np.sum(x_select)}')

    return np.float32(y_regression), np.float32(slope), np.float32(intercept)

コード例 #6

0

ファイルを表示

    #    ax_2 = fig.add_subplot(132)
    ax_3 = fig.add_subplot(111)
    #    ax_1.plot(fpos_arr_l, hfd_arr_l)
    #    ax_2.plot(fpos_arr_r, hfd_arr_r)
    ax_3.scatter(fpos_arr, hfd_arr, color='green')

    # flip left side around so HFR values INCREASE with index
    print('fit')
    #    robust_right = robust_line_fit(fpos_arr_r, hfd_arr_r)
    #    robust_left = robust_line_fit(np.flipud(fpos_arr_l), np.flipud(hfd_arr_l))
    #    robust_best_pos = (robust_left[1]-robust_right[1])/(robust_right[0]-robust_left[0])
    #    print(f"Robust left  -> {robust_left}")
    #    print(f"Robust right -> {robust_right}")
    #    print(f"Robust intersection/best focus -> {robust_best_pos}")

    siegel_left_fit = siegelslopes(hfd_arr_l, fpos_arr_l)
    siegel_right_fit = siegelslopes(hfd_arr_r, fpos_arr_r)
    siegel_left_zero = -siegel_left_fit[1] / siegel_left_fit[0]
    siegel_right_zero = -siegel_right_fit[1] / siegel_right_fit[0]
    siegel_best_pos = (siegel_left_fit[1] - siegel_right_fit[1]) / (
        siegel_right_fit[0] - siegel_left_fit[0])
    logging.info(f'siegel left  fit = {siegel_left_fit}')
    logging.info(f'siegel right fit = {siegel_right_fit}')
    logging.info(f'siegel best pos  = {siegel_best_pos}')

    ax_3.plot(fpos_arr[:midx + 5],
              siegel_left_fit[0] * fpos_arr[:midx + 5] + siegel_left_fit[1])
    ax_3.plot(fpos_arr[midx - 5:],
              siegel_right_fit[0] * fpos_arr[midx - 5:] + siegel_right_fit[1])
    ax_3.axvline(siegel_best_pos, color='red')

コード例 #7

0

ファイルを表示

ファイル: scipy-stats-siegelslopes-1.py プロジェクト: dacsgb/Comp-Methods

from scipy import stats
import matplotlib.pyplot as plt

x = np.linspace(-5, 5, num=150)
y = x + np.random.normal(size=x.size)
y[11:15] += 10  # add outliers
y[-5:] -= 7

# Compute the slope and intercept.  For comparison, also compute the
# least-squares fit with `linregress`:

res = stats.siegelslopes(y, x)
lsq_res = stats.linregress(x, y)

# Plot the results. The Siegel regression line is shown in red. The green
# line shows the least-squares fit for comparison.

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y, 'b.')
ax.plot(x, res[1] + res[0] * x, 'r-')
ax.plot(x, lsq_res[1] + lsq_res[0] * x, 'g-')
plt.show()