Esempio n. 1
0
def test_bootstrap_seed(random):
    """Test that we can get reproducible resamples by seeding the RNG."""
    data = np.random.randn(50)
    seed = 42
    boots1 = algo.bootstrap(data, seed=seed)
    boots2 = algo.bootstrap(data, seed=seed)
    assert_array_equal(boots1, boots2)
Esempio n. 2
0
def test_bootstrap(random):
    """Test that bootstrapping gives the right answer in dumb cases."""
    a_ones = np.ones(10)
    n_boot = 5
    out1 = algo.bootstrap(a_ones, n_boot=n_boot)
    assert_array_equal(out1, np.ones(n_boot))
    out2 = algo.bootstrap(a_ones, n_boot=n_boot, func=np.median)
    assert_array_equal(out2, np.ones(n_boot))
Esempio n. 3
0
def test_bootstrap_length(random):
    """Test that we get a bootstrap array of the right shape."""
    a_norm = np.random.randn(1000)
    out = algo.bootstrap(a_norm)
    assert len(out) == 10000

    n_boot = 100
    out = algo.bootstrap(a_norm, n_boot=n_boot)
    assert len(out) == n_boot
Esempio n. 4
0
def test_bootstrap_axis(random):
    """Test axis kwarg to bootstrap function."""
    x = np.random.randn(10, 20)
    n_boot = 100

    out_default = algo.bootstrap(x, n_boot=n_boot)
    assert out_default.shape == (n_boot, )

    out_axis = algo.bootstrap(x, n_boot=n_boot, axis=0)
    assert out_axis.shape, (n_boot, x.shape[1])
Esempio n. 5
0
def test_bootstrap_reproducibility(random):
    """Test that bootstrapping uses the internal random state."""
    data = np.random.randn(50)
    boots1 = algo.bootstrap(data, seed=100)
    boots2 = algo.bootstrap(data, seed=100)
    assert_array_equal(boots1, boots2)

    with pytest.warns(UserWarning):
        # Deprecatd, remove when removing random_seed
        boots1 = algo.bootstrap(data, random_seed=100)
        boots2 = algo.bootstrap(data, random_seed=100)
        assert_array_equal(boots1, boots2)
Esempio n. 6
0
def test_bootstrap_units(random):
    """Test that results make sense when passing unit IDs to bootstrap."""
    data = np.random.randn(50)
    ids = np.repeat(range(10), 5)
    bwerr = np.random.normal(0, 2, 10)
    bwerr = bwerr[ids]
    data_rm = data + bwerr
    seed = 77

    boots_orig = algo.bootstrap(data_rm, seed=seed)
    boots_rm = algo.bootstrap(data_rm, units=ids, seed=seed)
    assert boots_rm.std() > boots_orig.std()
Esempio n. 7
0
def test_bootstrap_range(random):
    """Test that bootstrapping a random array stays within the right range."""
    a_norm = np.random.randn(1000)
    amin, amax = a_norm.min(), a_norm.max()
    out = algo.bootstrap(a_norm)
    assert amin <= out.min()
    assert amax >= out.max()
Esempio n. 8
0
def bootstrapped_ci(x, func, n_boot, which_ci=95, axis=None):
    """
    Get the confidence interval (CI) of a metric using bootstrapping.

    Parameters
    ----------
    x : array-like
        a sample.
    func : callable (function object)
        the function that estimated the metric (for example np.mean, np.median, ...).
    n_boot : int
        number of sub-samples to use for the bootstrap estimate.
    which_ci : float, optional
        A number between 0 and 100 that defines the confidence interval.
        The default is 95, which means that there is 95% probability the metric
        will be inside the limits of the confidence interval.
    axis : int or None, optional
        Will pass axis to func as a keyword argument.
        The default is None.

    Returns
    -------
    TYPE
        DESCRIPTION.

    """
    from seaborn.algorithms import bootstrap
    from seaborn.utils import ci

    boot_distribution = bootstrap(x, func=func, n_boot=n_boot, axis=axis)

    return ci(boot_distribution, which=which_ci, axis=axis)
Esempio n. 9
0
def test_nanaware_func_warning(random):

    x = np.random.normal(size=10)
    x[0] = np.nan
    with pytest.warns(UserWarning, match="Data contain nans but"):
        boots = algo.bootstrap(x, func="ptp")
    assert np.isnan(boots).any()
Esempio n. 10
0
		def bootstrapped_cis(vals):

			if len(vals) <= 1:
				return null_ci

			boots = bootstrap(vals, func=func, n_boot=n_boot, seed=seed)
			cis = utils.ci(boots, ci)
			return pd.Series(cis, ["low", "high"])
Esempio n. 11
0
def test_bootstrap_multiarg(random):
    """Test that bootstrap works with multiple input arrays."""
    x = np.vstack([[1, 10] for i in range(10)])
    y = np.vstack([[5, 5] for i in range(10)])

    def f(x, y):
        return np.vstack((x, y)).max(axis=0)

    out_actual = algo.bootstrap(x, y, n_boot=2, func=f)
    out_wanted = np.array([[5, 10], [5, 10]])
    assert_array_equal(out_actual, out_wanted)
Esempio n. 12
0
def test_bootstrap_ols(random):
    """Test bootstrap of OLS model fit."""
    def ols_fit(X, y):
        XtXinv = np.linalg.inv(np.dot(X.T, X))
        return XtXinv.dot(X.T).dot(y)

    X = np.column_stack((np.random.randn(50, 4), np.ones(50)))
    w = [2, 4, 0, 3, 5]
    y_noisy = np.dot(X, w) + np.random.randn(50) * 20
    y_lownoise = np.dot(X, w) + np.random.randn(50)

    n_boot = 500
    w_boot_noisy = algo.bootstrap(X, y_noisy, n_boot=n_boot, func=ols_fit)
    w_boot_lownoise = algo.bootstrap(X,
                                     y_lownoise,
                                     n_boot=n_boot,
                                     func=ols_fit)

    assert w_boot_noisy.shape == (n_boot, 5)
    assert w_boot_lownoise.shape == (n_boot, 5)
    assert w_boot_noisy.std() > w_boot_lownoise.std()
  def get_params(self, grid):
    """Low-level regression and prediction. Adapted from seaborn."""

    def reg_func(x_, y_):
      return np.linalg.pinv(x_).dot(y_)

    x, y = np.c_[np.ones(len(self.x)), self.x], self.y
    grid = np.c_[np.ones(len(grid)), grid]
    beta_plot = reg_func(x, y)
    yhat = grid.dot(beta_plot)
    if self.ci is None:
      return yhat, None
    beta_boots = sns_algos.bootstrap(
        x, y, func=reg_func, n_boot=self.n_boot, units=self.units,  # pytype: disable=attribute-error
        seed=self.seed).T
    return beta_plot, beta_boots
Esempio n. 14
0
def test_bootstrap_string_func():
    """Test that named numpy methods are the same as the numpy function."""
    x = np.random.randn(100)

    res_a = algo.bootstrap(x, func="mean", seed=0)
    res_b = algo.bootstrap(x, func=np.mean, seed=0)
    assert np.array_equal(res_a, res_b)

    res_a = algo.bootstrap(x, func="std", seed=0)
    res_b = algo.bootstrap(x, func=np.std, seed=0)
    assert np.array_equal(res_a, res_b)

    with pytest.raises(AttributeError):
        algo.bootstrap(x, func="not_a_method_name")
Esempio n. 15
0
    def fit_logx(self, grid):
        """Fit the model in log-space."""
        X, y = np.c_[np.ones(len(self.x)), self.x], self.y
        grid = np.c_[np.ones(len(grid)), np.log(grid)]

        def reg_func(_x, _y):
            _x = np.c_[_x[:, 0], np.log(_x[:, 1])]
            _y = np.log(_y)

            return np.linalg.pinv(_x).dot(_y)

        self.betas = reg_func(X, y)
        yhat = grid.dot(self.betas)
        if self.ci is None:
            return np.exp(yhat), None

        beta_boots = algo.bootstrap(X,
                                    y,
                                    func=reg_func,
                                    n_boot=self.n_boot,
                                    units=self.units).T
        yhat_boots = grid.dot(beta_boots).T

        return np.exp(yhat), np.exp(yhat_boots)
Esempio n. 16
0
def test_nanaware_func_auto(random):

    x = np.random.normal(size=10)
    x[0] = np.nan
    boots = algo.bootstrap(x, func="mean")
    assert not np.isnan(boots).any()
Esempio n. 17
0
def test_bootstrap_arglength():
    """Test that different length args raise ValueError."""
    with pytest.raises(ValueError):
        algo.bootstrap(np.arange(5), np.arange(10))
Esempio n. 18
0
def fit_scale_heights(data,
                      masks,
                      min_lat=None,
                      max_lat=None,
                      deredden=False,
                      fig_names=None,
                      return_smoothed=False,
                      smoothed_width=None,
                      xlim=None,
                      ylim=None,
                      robust=True,
                      n_boot=10000):
    """
    Fits scale height data and returns slopes

    Parameters
    ----------
    data: `skySurvey`
        WHAM skySurvey object of full sky (requires track keyword), or spiral arm section
    masks: `list like`
        longitude masks to use
    min_lat:   `u.Quantity`
        min latitude to fit
    max_lat:   `u.Quantity`
        max latitude to fit
    deredden: `bool`
        if True, also fits dereddened slopes
    fig_names: `str`
        if provided, saves figures following this name
    return_smoothed: `bool`
        if True, returns smoothed longitude and slope estimates
    smoothed_width: `u.Quantity`
        width to smooth data to in longitude
    robust: `bool`
        if True, uses stats.models.robust_linear_model
    n_boot: `int`
        only if robust = True
        number of bootstrap resamples
    """

    # Default values
    if min_lat is None:
        min_lat = 5 * u.deg
    elif not hasattr(min_lat, "unit"):
        min_lat *= u.deg

    if max_lat is None:
        max_lat = 35 * u.deg
    elif not hasattr(max_lat, "unit"):
        max_lat *= u.deg

    if smoothed_width is None:
        smoothed_width = 5 * u.deg
    elif not hasattr(smoothed_width, "unit"):
        smoothed_width *= u.deg

    #initialize data arrays

    slopes_pos = []
    slopes_neg = []
    slopes_pos_dr = []
    slopes_neg_dr = []
    intercept_pos = []
    intercept_neg = []
    intercept_pos_dr = []
    intercept_neg_dr = []
    slopes_pos_err = []
    slopes_neg_err = []
    slopes_pos_dr_err = []
    slopes_neg_dr_err = []
    intercept_pos_err = []
    intercept_neg_err = []
    intercept_pos_dr_err = []
    intercept_neg_dr_err = []
    median_longitude = []
    median_distance = []
    for ell2 in range(len(masks)):
        xx = data["tan(b)"][masks[ell2]]
        yy = np.log(data["INTEN"][masks[ell2]])
        nan_mask = np.isnan(yy)
        nan_mask |= np.isinf(yy)

        if deredden:
            zz = np.log(data["INTEN_DERED"][masks[ell2]])
            nan_mask_z = np.isnan(zz)
            nan_mask_z |= np.isinf(zz)

        median_longitude.append(np.median(data["GAL-LON"][masks[ell2]]))
        if deredden:
            median_distance.append(np.median(data["DISTANCE"][masks[ell2]]))

        y_min = np.tan(min_lat)
        y_max = np.tan(max_lat)

        if not robust:

            if hasattr(stats, "siegelslopes"):
                slope_estimator = stats.siegelslopes
            else:
                logging.warning(
                    "Installed version of scipy does not have the siegelslopes method in scipy.stats!"
                )
                slope_estimator = stats.theilslopes

            siegel_result_pos = slope_estimator(
                yy[(xx > y_min) & (xx < y_max) & ~nan_mask],
                xx[(xx > y_min) & (xx < y_max) & ~nan_mask])
            siegel_result_neg = slope_estimator(
                yy[(xx < -y_min) & (xx > -y_max) & ~nan_mask],
                xx[(xx < -y_min) & (xx > -y_max) & ~nan_mask])

            if deredden:
                siegel_result_pos_dr = slope_estimator(
                    zz[(xx > y_min) & (xx < y_max) & ~nan_mask_z],
                    xx[(xx > y_min) & (xx < y_max) & ~nan_mask_z])
                siegel_result_neg_dr = slope_estimator(
                    zz[(xx < -y_min) & (xx > -y_max) & ~nan_mask_z],
                    xx[(xx < -y_min) & (xx > -y_max) & ~nan_mask_z])

            slopes_pos.append(siegel_result_pos[0])
            slopes_neg.append(siegel_result_neg[0])

            intercept_pos.append(siegel_result_pos[1])
            intercept_neg.append(siegel_result_neg[1])

            if deredden:
                slopes_pos_dr.append(siegel_result_pos_dr[0])
                slopes_neg_dr.append(siegel_result_neg_dr[0])
                intercept_pos_dr.append(siegel_result_pos_dr[1])
                intercept_neg_dr.append(siegel_result_neg_dr[1])

            if fig_names is not None:
                figure_name = "{0}_{1}.png".format(fig_names, ell2)

                if xlim is None:
                    xlim = np.array([-0.9, 0.9])
                if ylim is None:
                    ylim = np.array([-4.6, 3.2])

                fig = plt.figure()
                ax = fig.add_subplot(111)
                ax2 = ax.twiny()

                ax.scatter(xx, yy, color="k", alpha=0.8)
                if deredden:
                    ax.scatter(xx, zz, color="grey", alpha=0.8)

                ax.set_xlabel(r"$\tan$(b)", fontsize=12)
                ax.set_ylabel(r"$\log$($H\alpha$ Intensity / R)", fontsize=12)

                ax.set_title(r"${0:.1f} < l < {1:.1f}$".format(
                    data["GAL-LON"][masks[ell2]].min(),
                    data["GAL-LON"][masks[ell2]].max()),
                             fontsize=14)

                ax2.plot(np.degrees(np.arctan(xlim)),
                         np.log([0.1, 0.1]),
                         ls=":",
                         lw=1,
                         color="k",
                         label="0.1 R")
                ax2.fill_between([-min_lat, min_lat] * u.deg,
                                 [ylim[0], ylim[0]], [ylim[1], ylim[1]],
                                 color=pal[1],
                                 alpha=0.1,
                                 label=r"$|b| < 5\degree$")

                line_xx = np.linspace(y_min, y_max, 10)
                line_yy_pos = siegel_result_pos[
                    0] * line_xx + siegel_result_pos[1]
                line_yy_neg = siegel_result_neg[
                    0] * -line_xx + siegel_result_neg[1]
                ax.plot(line_xx,
                        line_yy_pos,
                        color="r",
                        lw=3,
                        alpha=0.9,
                        label=r"$H_{{n_e^2}} = {0:.2f} D$".format(
                            1 / -siegel_result_pos[0]))
                ax.plot(-line_xx,
                        line_yy_neg,
                        color="b",
                        lw=3,
                        alpha=0.9,
                        label=r"$H_{{n_e^2}} = {0:.2f} D$".format(
                            1 / siegel_result_neg[0]))

                if deredden:
                    line_yy_pos_dr = siegel_result_pos_dr[
                        0] * line_xx + siegel_result_pos_dr[1]
                    line_yy_neg_dr = siegel_result_neg_dr[
                        0] * -line_xx + siegel_result_neg_dr[1]
                    ax.plot(line_xx,
                            line_yy_pos_dr,
                            color="r",
                            lw=3,
                            alpha=0.9,
                            ls="--",
                            label=r"Dered: $H_{{n_e^2}} = {0:.2f} D$".format(
                                1 / -siegel_result_pos_dr[0]))
                    ax.plot(-line_xx,
                            line_yy_neg_dr,
                            color="b",
                            lw=3,
                            alpha=0.9,
                            ls="--",
                            label=r"Dered: $H_{{n_e^2}} = {0:.2f} D$".format(
                                1 / siegel_result_neg_dr[0]))

                ax.set_xlim(xlim)
                ax.set_ylim(ylim)

                ax2.set_xlabel(r"$b$ (deg)", fontsize=12)
                ax2.set_xlim(np.degrees(np.arctan(xlim)))

                ax.legend(fontsize=12, loc=1)
                ax2.legend(fontsize=12, loc=2)

                plt.tight_layout()

                plt.savefig(figure_name, dpi=300)
                del (fig)
                plt.close()

            results = {
                "median_longitude": np.array(median_longitude),
                "slopes_pos": np.array(slopes_pos),
                "slopes_neg": np.array(slopes_neg),
                "intercept_pos": np.array(intercept_pos),
                "intercept_neg": np.array(intercept_neg)
            }

            if deredden:

                results["median_distance"] = np.array(median_distance),
                results["slopes_pos_dr"] = np.array(slopes_pos_dr)
                results["slopes_neg_dr"] = np.array(slopes_neg_dr)
                results["intercept_pos_dr"] = np.array(intercept_pos_dr)
                results["intercept_neg_dr"] = np.array(intercept_neg_dr)

        else:
            yy_pos = yy[(xx > y_min) & (xx < y_max) & ~nan_mask]
            xx_pos = xx[(xx > y_min) & (xx < y_max) & ~nan_mask]
            yy_neg = yy[(xx < -y_min) & (xx > -y_max) & ~nan_mask]
            xx_neg = xx[(xx < -y_min) & (xx > -y_max) & ~nan_mask]
            if ((len(yy_pos) < 5) | (len(yy_neg) < 5)):
                slopes_pos.append(np.mean(boot_pos[:, 1], axis=0))
                slopes_neg.append(np.mean(boot_neg[:, 1], axis=0))
                slopes_pos_err.append(np.std(boot_pos[:, 1], axis=0))
                slopes_neg_err.append(np.std(boot_neg[:, 1], axis=0))

                intercept_pos.append(np.mean(boot_pos[:, 0], axis=0))
                intercept_neg.append(np.mean(boot_neg[:, 0], axis=0))
                intercept_pos_err.append(np.std(boot_pos[:, 0], axis=0))
                intercept_neg_err.append(np.std(boot_neg[:, 0], axis=0))
            else:
                if deredden:
                    zz_dr_pos = zz[(xx > y_min) & (xx < y_max) & ~nan_mask_z]
                    xx_dr_pos = xx[(xx > y_min) & (xx < y_max) & ~nan_mask_z]
                    zz_dr_neg = zz[(xx < -y_min) & (xx > -y_max) & ~nan_mask_z]
                    xx_dr_neg = xx[(xx < -y_min) & (xx > -y_max) & ~nan_mask_z]

                    def slope_int_estimator_pos_dr(inds,
                                                   YY=zz_dr_pos,
                                                   XX=xx_dr_pos):
                        """
                        estimate slope using sm.RLM
                        """
                        XX = XX[inds]
                        YY = YY[inds]
                        XX = sm.add_constant(XX)
                        res = sm.RLM(YY, XX, M=sm.robust.norms.HuberT()).fit()
                        return res.params

                    def slope_int_estimator_neg_dr(inds,
                                                   YY=zz_dr_neg,
                                                   XX=xx_dr_neg):
                        """
                        estimate slope using sm.RLM
                        """
                        XX = XX[inds]
                        YY = YY[inds]
                        XX = sm.add_constant(XX)
                        res = sm.RLM(YY, XX, M=sm.robust.norms.HuberT()).fit()
                        return res.params

                def slope_int_estimator_pos(inds, YY=yy_pos, XX=xx_pos):
                    """
                    estimate slope using sm.RLM
                    """
                    XX = XX[inds]
                    YY = YY[inds]
                    XX = sm.add_constant(XX)
                    res = sm.RLM(YY, XX, M=sm.robust.norms.HuberT()).fit()
                    return res.params

                def slope_int_estimator_neg(inds, YY=yy_neg, XX=xx_neg):
                    """
                    estimate slope using sm.RLM
                    """
                    XX = XX[inds]
                    YY = YY[inds]
                    XX = sm.add_constant(XX)
                    res = sm.RLM(YY, XX, M=sm.robust.norms.HuberT()).fit()
                    return res.params

                boot_pos = bootstrap(np.arange(len(yy_pos)),
                                     func=slope_int_estimator_pos,
                                     n_boot=n_boot)
                boot_neg = bootstrap(np.arange(len(yy_neg)),
                                     func=slope_int_estimator_neg,
                                     n_boot=n_boot)

                slopes_pos.append(np.mean(boot_pos[:, 1], axis=0))
                slopes_neg.append(np.mean(boot_neg[:, 1], axis=0))
                slopes_pos_err.append(np.std(boot_pos[:, 1], axis=0))
                slopes_neg_err.append(np.std(boot_neg[:, 1], axis=0))

                intercept_pos.append(np.mean(boot_pos[:, 0], axis=0))
                intercept_neg.append(np.mean(boot_neg[:, 0], axis=0))
                intercept_pos_err.append(np.std(boot_pos[:, 0], axis=0))
                intercept_neg_err.append(np.std(boot_neg[:, 0], axis=0))

                if deredden:
                    boot_pos_dr = bootstrap(np.arange(len(zz_dr_pos)),
                                            func=slope_int_estimator_pos_dr,
                                            n_boot=n_boot)
                    boot_neg_dr = bootstrap(np.arange(len(zz_dr_neg)),
                                            func=slope_int_estimator_neg_dr,
                                            n_boot=n_boot)

                    slopes_pos_dr.append(np.mean(boot_pos_dr[:, 1], axis=0))
                    slopes_neg_dr.append(np.mean(boot_neg_dr[:, 1], axis=0))
                    slopes_pos_dr_err.append(np.std(boot_pos_dr[:, 1], axis=0))
                    slopes_neg_dr_err.append(np.std(boot_neg_dr[:, 1], axis=0))

                    intercept_pos_dr.append(np.mean(boot_pos_dr[:, 0], axis=0))
                    intercept_neg_dr.append(np.mean(boot_neg_dr[:, 0], axis=0))
                    intercept_pos_dr_err.append(
                        np.std(boot_pos_dr[:, 0], axis=0))
                    intercept_neg_dr_err.append(
                        np.std(boot_neg_dr[:, 0], axis=0))

                if fig_names is not None:
                    figure_name = "{0}_{1}.png".format(fig_names, ell2)

                    if xlim is None:
                        xlim = np.array([-0.9, 0.9])
                    if ylim is None:
                        ylim = np.array([-4.6, 3.2])

                    fig = plt.figure()
                    ax = fig.add_subplot(111)
                    ax2 = ax.twiny()

                    ax.scatter(xx, yy, color="k", alpha=0.8)
                    if deredden:
                        ax.scatter(xx, zz, color="grey", alpha=0.8)

                    ax.set_xlabel(r"$\tan$(b)", fontsize=12)
                    ax.set_ylabel(r"$\log$($H\alpha$ Intensity / R)",
                                  fontsize=12)

                    ax.set_title(r"${0:.1f} < l < {1:.1f}$".format(
                        data["GAL-LON"][masks[ell2]].min(),
                        data["GAL-LON"][masks[ell2]].max()),
                                 fontsize=14)

                    ax2.plot(np.degrees(np.arctan(xlim)),
                             np.log([0.1, 0.1]),
                             ls=":",
                             lw=1,
                             color="k",
                             label="0.1 R")
                    ax2.fill_between([-min_lat, min_lat] * u.deg,
                                     [ylim[0], ylim[0]], [ylim[1], ylim[1]],
                                     color=pal[1],
                                     alpha=0.1,
                                     label=r"$|b| < 5\degree$")

                    line_xx = np.linspace(y_min, y_max, 100)

                    def get_slope_conf_band(boot_res, X=line_xx):
                        yy = [[res[0] + res[1] * X] for res in boot_res]
                        yy = np.vstack(yy)
                        return np.percentile(yy, (5, 95), axis=0)

                    line_yy_pos = slopes_pos[-1] * line_xx + intercept_pos[-1]
                    line_yy_neg = slopes_neg[-1] * -line_xx + intercept_neg[-1]
                    line_yy_pos_range = get_slope_conf_band(boot_pos)
                    line_yy_neg_range = get_slope_conf_band(boot_neg,
                                                            X=-line_xx)

                    ax.plot(line_xx,
                            line_yy_pos,
                            color="r",
                            lw=3,
                            alpha=0.9,
                            label=r"$H_{{n_e^2}} = ({0:.2f} \pm {1:.2f}) D$".
                            format(
                                1 / -slopes_pos[-1],
                                np.abs(1 / slopes_pos[-1] *
                                       slopes_pos_err[-1] / slopes_pos[-1])))
                    ax.fill_between(line_xx,
                                    line_yy_pos_range[0],
                                    line_yy_pos_range[1],
                                    color="r",
                                    alpha=0.2)
                    ax.plot(-line_xx,
                            line_yy_neg,
                            color="b",
                            lw=3,
                            alpha=0.9,
                            label=r"$H_{{n_e^2}} = ({0:.2f} \pm {1:.2f}) D$".
                            format(
                                1 / slopes_neg[-1],
                                np.abs(-1 / slopes_pos[-1] *
                                       slopes_pos_err[-1] / slopes_pos[-1])))
                    ax.fill_between(-line_xx,
                                    line_yy_neg_range[0],
                                    line_yy_neg_range[1],
                                    color="b",
                                    alpha=0.2)

                    if deredden:
                        line_yy_pos_dr = slopes_pos_dr[
                            -1] * line_xx + intercept_pos_dr[-1]
                        line_yy_neg_dr = slopes_neg_dr[
                            -1] * -line_xx + intercept_neg_dr[-1]
                        line_yy_pos_range_dr = get_slope_conf_band(boot_pos_dr)
                        line_yy_neg_range_dr = get_slope_conf_band(boot_neg_dr,
                                                                   X=-line_xx)

                        ax.plot(
                            line_xx,
                            line_yy_pos_dr,
                            color="r",
                            lw=3,
                            alpha=0.9,
                            ls="--",
                            label=
                            r"Dered: $H_{{n_e^2}} = ({0:.2f} \pm {1:.2f}) D$".
                            format(
                                1 / -slopes_pos_dr[-1],
                                np.abs(1 / slopes_pos_dr[-1] *
                                       slopes_pos_dr_err[-1] /
                                       slopes_pos_dr[-1])))
                        ax.fill_between(line_xx,
                                        line_yy_pos_range_dr[0],
                                        line_yy_pos_range_dr[1],
                                        color="r",
                                        alpha=0.2)
                        ax.plot(
                            -line_xx,
                            line_yy_neg_dr,
                            color="b",
                            lw=3,
                            alpha=0.9,
                            ls="--",
                            label=
                            r"Dered: $H_{{n_e^2}} = ({0:.2f} \pm {1:.2f}) D$".
                            format(
                                1 / slopes_neg_dr[-1],
                                np.abs(-1 / slopes_pos_dr[-1] *
                                       slopes_pos_dr_err[-1] /
                                       slopes_pos_dr[-1])))
                        ax.fill_between(-line_xx,
                                        line_yy_neg_range_dr[0],
                                        line_yy_neg_range_dr[1],
                                        color="b",
                                        alpha=0.2)

                    ax.set_xlim(xlim)
                    ax.set_ylim(ylim)

                    ax2.set_xlabel(r"$b$ (deg)", fontsize=12)
                    ax2.set_xlim(np.degrees(np.arctan(xlim)))

                    ax.legend(fontsize=12, loc=1)
                    ax2.legend(fontsize=12, loc=2)

                    plt.tight_layout()

                    plt.savefig(figure_name, dpi=300)
                    del (fig)
                    plt.close()

            results = {
                "median_longitude": np.array(median_longitude),
                "slopes_pos": np.array(slopes_pos),
                "slopes_neg": np.array(slopes_neg),
                "intercept_pos": np.array(intercept_pos),
                "intercept_neg": np.array(intercept_neg),
                "slopes_pos_err": np.array(slopes_pos_err),
                "slopes_neg_err": np.array(slopes_neg_err),
                "intercept_pos_err": np.array(intercept_pos_err),
                "intercept_neg_err": np.array(intercept_neg_err)
            }

            if deredden:

                results["median_distance"] = np.array(median_distance),
                results["slopes_pos_dr"] = np.array(slopes_pos_dr)
                results["slopes_neg_dr"] = np.array(slopes_neg_dr)
                results["intercept_pos_dr"] = np.array(intercept_pos_dr)
                results["intercept_neg_dr"] = np.array(intercept_neg_dr)
                results["slopes_pos_dr_err"] = np.array(slopes_pos_dr_err)
                results["slopes_neg_dr_err"] = np.array(slopes_neg_dr_err)
                results["intercept_pos_dr_err"] = np.array(
                    intercept_pos_dr_err)
                results["intercept_neg_dr_err"] = np.array(
                    intercept_neg_dr_err)

    if return_smoothed:
        results["smoothed_longitude"] = np.arange(np.min(median_longitude),
                                                  np.max(median_longitude),
                                                  0.25)
        if deredden:
            distance_interp = interp1d(median_longitude, median_distance)
            results["smoothed_distance"] = distance_interp(
                results["smoothed_longitude"])
        smoothed_slope_pos_ha = np.zeros(
            (3, len(results["smoothed_longitude"])))
        smoothed_slope_neg_ha = np.zeros(
            (3, len(results["smoothed_longitude"])))
        smoothed_slope_pos_ha_dr = np.zeros(
            (3, len(results["smoothed_longitude"])))
        smoothed_slope_neg_ha_dr = np.zeros(
            (3, len(results["smoothed_longitude"])))
        for ell, lon in enumerate(results["smoothed_longitude"]):
            smoothed_slope_pos_ha[:, ell] = np.nanpercentile(
                np.array(slopes_pos)
                [(median_longitude <= lon + smoothed_width.value / 2)
                 & (median_longitude > lon - smoothed_width.value / 2)],
                (10, 50, 90))
            smoothed_slope_neg_ha[:, ell] = np.nanpercentile(
                np.array(slopes_neg)
                [(median_longitude <= lon + smoothed_width.value / 2)
                 & (median_longitude > lon - smoothed_width.value / 2)],
                (10, 50, 90))
            if deredden:
                smoothed_slope_pos_ha_dr[:, ell] = np.nanpercentile(
                    np.array(slopes_pos_dr)
                    [(median_longitude <= lon + smoothed_width.value / 2)
                     & (median_longitude > lon - smoothed_width.value / 2)],
                    (10, 50, 90))
                smoothed_slope_neg_ha_dr[:, ell] = np.nanpercentile(
                    np.array(slopes_neg_dr)
                    [(median_longitude <= lon + smoothed_width.value / 2)
                     & (median_longitude > lon - smoothed_width.value / 2)],
                    (10, 50, 90))

        results["smoothed_slopes_pos"] = smoothed_slope_pos_ha
        results["smoothed_slopes_neg"] = smoothed_slope_neg_ha
        if deredden:
            results["smoothed_slopes_pos_dr"] = smoothed_slope_pos_ha_dr
            results["smoothed_slopes_neg_dr"] = smoothed_slope_neg_ha_dr

    return results
Esempio n. 19
0
def regplot(x,
            y,
            data=None,
            model=None,
            ci=95.,
            scatter_color=None,
            model_color='k',
            ax=None,
            scatter_kws={},
            regplot_kws={},
            cmap=None,
            cax=None,
            clabel=None,
            xlabel=False,
            ylabel=False,
            colorbar=False,
            **kwargs):
    if model is None:
        import statsmodels.api as sm
        model = sm.OLS
    from seaborn import utils
    from seaborn import algorithms as algo
    if ax is None:
        fig, ax = plt.subplots()
    if data is None:
        _x = x
        _y = y
    else:
        _x = data[x]
        _y = data[y]
    grid = np.linspace(_x.min(), _x.max(), 100)

    X = np.c_[np.ones(len(_x)), _x]
    G = np.c_[np.ones(len(grid)), grid]

    results = model(_y, X, **kwargs).fit()

    def reg_func(xx, yy):
        yhat = model(yy, xx, **kwargs).fit().predict(G)
        return yhat

    yhat = results.predict(G)
    yhat_boots = algo.bootstrap(X, _y, func=reg_func, n_boot=1000, units=None)
    err_bands = utils.ci(yhat_boots, ci, axis=0)
    ax.plot(grid, yhat, color=model_color, **regplot_kws)
    sc = ax.scatter(_x, _y, c=scatter_color, **scatter_kws)
    ax.fill_between(grid, *err_bands, facecolor=model_color, alpha=.15)
    if colorbar:
        cb = plt.colorbar(mappable=sc, cax=cax, ax=ax)
        cb.ax.yaxis.set_ticks_position('right')
        if clabel: cb.set_label(clabel)

    if xlabel:
        if isinstance(xlabel, str):
            ax.set_xlabel(xlabel)
        else:
            ax.set_xlabel(x)
    if ylabel:
        if isinstance(ylabel, str):
            ax.set_ylabel(ylabel)
        else:
            ax.set_ylabel(y)
    return results
Esempio n. 20
0
    def estimate_statistic(self, estimator, ci, n_boot):

        if self.hue_names is None:
            statistic = []
            confint = []
        else:
            statistic = [[] for _ in self.plot_data]
            confint = [[] for _ in self.plot_data]

        for i, group_data in enumerate(self.plot_data):

            # Option 1: we have a single layer of grouping
            # --------------------------------------------

            if self.plot_hues is None:

                if self.plot_units is None:
                    stat_data = remove_na(group_data)
                    unit_data = None
                else:
                    unit_data = self.plot_units[i]
                    have = pd.notnull(np.c_[group_data, unit_data]).all(axis=1)
                    stat_data = group_data[have]
                    unit_data = unit_data[have]

                # Estimate a statistic from the vector of data
                if not stat_data.size:
                    statistic.append(np.nan)
                else:
                    statistic.append(estimator(stat_data))

                # Get a confidence interval for this estimate
                if ci is not None:

                    if stat_data.size < 2:
                        confint.append([np.nan, np.nan])
                        continue

                    if ci == "sd":

                        estimate = estimator(stat_data)
                        sd = np.std(stat_data)
                        confint.append((estimate - sd, estimate + sd))

                    elif ci == "range":
                        confint.append((np.min(stat_data), np.max(stat_data)))

                    else:

                        boots = bootstrap(stat_data,
                                          func=estimator,
                                          n_boot=n_boot,
                                          units=unit_data)
                        confint.append(utils.ci(boots, ci))

            # Option 2: we are grouping by a hue layer
            # ----------------------------------------

            else:
                for j, hue_level in enumerate(self.hue_names):

                    if not self.plot_hues[i].size:
                        statistic[i].append(np.nan)
                        if ci is not None:
                            confint[i].append((np.nan, np.nan))
                        continue

                    hue_mask = self.plot_hues[i] == hue_level
                    if self.plot_units is None:
                        stat_data = remove_na(group_data[hue_mask])
                        unit_data = None
                    else:
                        group_units = self.plot_units[i]
                        have = pd.notnull(np.c_[group_data,
                                                group_units]).all(axis=1)
                        stat_data = group_data[hue_mask & have]
                        unit_data = group_units[hue_mask & have]

                    # Estimate a statistic from the vector of data
                    if not stat_data.size:
                        statistic[i].append(np.nan)
                    else:
                        statistic[i].append(estimator(stat_data))

                    # Get a confidence interval for this estimate
                    if ci is not None:

                        if stat_data.size < 2:
                            confint[i].append([np.nan, np.nan])
                            continue

                        if ci == "sd":

                            estimate = estimator(stat_data)
                            sd = np.std(stat_data)
                            confint[i].append((estimate - sd, estimate + sd))

                        elif ci == "range":
                            confint[i].append(
                                (np.min(stat_data), np.max(stat_data)))

                        else:

                            boots = bootstrap(stat_data,
                                              func=estimator,
                                              n_boot=n_boot,
                                              units=unit_data)
                            confint[i].append(utils.ci(boots, ci))

        # Save the resulting values for plotting
        self.statistic = np.array(statistic)
        self.confint = np.array(confint)
Esempio n. 21
0
for axs, attributes, titles in zip(axzs, attributes_all, titles_all):

    for axis, attribute, title in zip(axs, attributes, titles):
        N = 6
        men = [
            df[df.hate == "hateful"], df[df.hate == "normal"],
            df[df.hate_neigh], df[df.normal_neigh], df[df.is_63_2 == True],
            df[df.is_63_2 == False]
        ]
        tmp = []
        medians, medians_ci = [], []
        averages, averages_ci = [], []

        for category in men:
            boots = bootstrap(category[attribute],
                              func=np.nanmean,
                              n_boot=1000)
            ci_tmp = ci(boots)
            average = (ci_tmp[0] + ci_tmp[1]) / 2
            ci_average = (ci_tmp[1] - ci_tmp[0]) / 2
            averages.append(average)
            averages_ci.append(ci_average)
            boots = bootstrap(category[attribute],
                              func=np.nanmedian,
                              n_boot=1000)
            ci_tmp = ci(boots)
            median = (ci_tmp[0] + ci_tmp[1]) / 2
            ci_median = (ci_tmp[1] - ci_tmp[0]) / 2
            medians.append(median)
            medians_ci.append(ci_median)