Python DescrStatsW.DescrStatsW Examples, statsmodels.stats.weightstats.DescrStatsW.DescrStatsW Python Examples

Example #1

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

    def test_weightstats_ddof_tests(self):
        # explicit test that ttest and confint are independent of ddof
        # one sample case
        x1_2d = self.x1_2d
        w1 = self.w1

        d1w_d0 = DescrStatsW(x1_2d, weights=w1, ddof=0)
        d1w_d1 = DescrStatsW(x1_2d, weights=w1, ddof=1)
        d1w_d2 = DescrStatsW(x1_2d, weights=w1, ddof=2)

        #check confint independent of user ddof
        res0 = d1w_d0.ttest_mean()
        res1 = d1w_d1.ttest_mean()
        res2 = d1w_d2.ttest_mean()
        # concatenate into one array with np.r_
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        res0 = d1w_d0.ttest_mean(0.5)
        res1 = d1w_d1.ttest_mean(0.5)
        res2 = d1w_d2.ttest_mean(0.5)
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        #check confint independent of user ddof
        res0 = d1w_d0.tconfint_mean()
        res1 = d1w_d1.tconfint_mean()
        res2 = d1w_d2.tconfint_mean()
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

Example #2

0

Show file

def weighted_stat(stock_trading_df):
    if (stock_trading_df.shape[0] == 1):
        return pd.Series([
            0, 0, stock_trading_df['price'][0], stock_trading_df['price'][0],
            stock_trading_df['price'][0], 1, stock_trading_df['turnover'][0]
        ],
                         index=[
                             'price_var', 'price_std', 'price_mean',
                             'price_min', 'price_max', 'no_of_txn', 'turnover'
                         ])
    else:
        return pd.Series([
            DescrStatsW(stock_trading_df['price'],
                        stock_trading_df['volume']).var,
            DescrStatsW(stock_trading_df['price'],
                        stock_trading_df['volume']).std,
            DescrStatsW(stock_trading_df['price'],
                        stock_trading_df['volume']).mean,
            min(stock_trading_df['price']),
            max(stock_trading_df['price']),
            DescrStatsW(stock_trading_df['price']).nobs,
            sum(stock_trading_df['turnover'])
        ],
                         index=[
                             'price_var', 'price_std', 'price_mean',
                             'price_min', 'price_max', 'no_of_txn', 'turnover'
                         ])

Example #3

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

def test_ztest_ztost():
    # compare weightstats with separately tested proportion ztest ztost
    import statsmodels.stats.proportion as smprop

    x1 = [0, 1]
    w1 = [5, 15]

    res2 = smprop.proportions_ztest(15, 20., value=0.5)
    d1 = DescrStatsW(x1, w1)
    res1 = d1.ztest_mean(0.5)
    assert_allclose(res1, res2, rtol=0.03, atol=0.003)

    d2 = DescrStatsW(x1, np.array(w1) * 21. / 20)
    res1 = d2.ztest_mean(0.5)
    assert_almost_equal(res1, res2, decimal=12)

    res1 = d2.ztost_mean(0.4, 0.6)
    res2 = smprop.proportions_ztost(15, 20., 0.4, 0.6)
    assert_almost_equal(res1[0], res2[0], decimal=12)

    x2 = [0, 1]
    w2 = [10, 10]
    #d2 = DescrStatsW(x1, np.array(w1)*21./20)
    d2 = DescrStatsW(x2, w2)
    res1 = ztest(d1.asrepeats(), d2.asrepeats())
    res2 = smprop.proportions_chisquare(np.asarray([15, 10]),
                                        np.asarray([20., 20]))
    #TODO: check this is this difference expected?, see test_proportion
    assert_allclose(res1[1], res2[1], rtol=0.03)

    res1a = CompareMeans(d1, d2).ztest_ind()
    assert_allclose(res1a[1], res2[1], rtol=0.03)
    assert_almost_equal(res1a, res1, decimal=12)

Example #4

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

 def setup_class(cls):
     cls.x1 = np.array(
         [7.8, 6.6, 6.5, 7.4, 7.3, 7., 6.4, 7.1, 6.7, 7.6, 6.8])
     cls.x2 = np.array([4.5, 5.4, 6.1, 6.1, 5.4, 5., 4.1, 5.5])
     cls.d1 = DescrStatsW(cls.x1)
     cls.d2 = DescrStatsW(cls.x2)
     cls.cm = CompareMeans(cls.d1, cls.d2)

Example #5

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

    def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
        #        print 'random weights'
        #        print ttest_ind(x1, x2, weights=(w1, w2))
        #        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(
            ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r),
            14)
        #not the same as new version with random weights/replication
        #        assert x1r.shape[0] == d1w.sum_weights
        #        assert x2r.shape[0] == d2w.sum_weights

        assert_almost_equal(x2r.mean(0), d2w.mean, 14)
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)
        #note: the following is for 1d
        assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14)
        #assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19)
        #TODO: exception in corrcoef (scalar case)

        #one-sample tests
        #        print d1.ttest_mean(3)
        #        print stats.ttest_1samp(x1, 3)
        #        print d1w.ttest_mean(3)
        #        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(
            d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)

Example #6

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

        #        print d1w_2d.ttest_mean(3)
        #        #scipy.stats.ttest is also vectorized
        #        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)

Example #7

0

Show file

    def fit(self):
        """Once the exposure and outcome models are specified, we can estimate the risk ratio and risk difference.

        Returns
        -------
        Gains `risk_difference`, `risk_difference_ci`, and `risk_ratio` values
        """
        if (self._fit_exposure_ is False) or (self._fit_outcome_ is False):
            raise ValueError(
                'The exposure and outcome models must be specified before the doubly robust estimate can '
                'be generated')

        # Doubly robust estimator under all treated
        a_obs = self.df[self.exposure]
        y_obs = self.df[self.outcome]
        ps = self.df['_ps_']
        py_a1 = self.df['_pY1_']
        py_a0 = self.df['_pY0_']
        dr_a1 = np.where(a_obs == 1, (y_obs / ps) - ((py_a1 * (1 - ps)) / ps),
                         py_a1)

        # Doubly robust estimator under all untreated
        dr_a0 = np.where(a_obs == 1, py_a0,
                         (y_obs / (1 - ps) - ((py_a0 * ps) / (1 - ps))))

        # Generating estimates for the risk difference and risk ratio
        zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1)

        if self._weight_ is None:
            if self._continuous_outcome:
                self.average_treatment_effect = np.mean(dr_a1) - np.mean(dr_a0)
                var_ic = np.var(
                    (dr_a1 - dr_a0) - self.average_treatment_effect,
                    ddof=1) / self.df.shape[0]
                self.average_treatment_effect_se = np.sqrt(var_ic)
                self.average_treatment_effect_ci = [
                    self.average_treatment_effect - zalpha * np.sqrt(var_ic),
                    self.average_treatment_effect + zalpha * np.sqrt(var_ic)
                ]

            else:
                self.risk_difference = np.mean(dr_a1) - np.mean(dr_a0)
                self.risk_ratio = np.mean(dr_a1) / np.mean(dr_a0)
                var_ic = np.var((dr_a1 - dr_a0) - self.risk_difference,
                                ddof=1) / self.df.shape[0]
                self.risk_difference_se = np.sqrt(var_ic)
                self.risk_difference_ci = [
                    self.risk_difference - zalpha * np.sqrt(var_ic),
                    self.risk_difference + zalpha * np.sqrt(var_ic)
                ]
        else:
            dr_m1 = DescrStatsW(dr_a1, weights=self.df[self._weight_]).mean
            dr_m0 = DescrStatsW(dr_a0, weights=self.df[self._weight_]).mean

            if self._continuous_outcome:
                self.average_treatment_effect = dr_m1 - dr_m0
            else:
                self.risk_difference = dr_m1 - dr_m0
                self.risk_ratio = dr_m1 / dr_m0

Example #8

0

Show file

File: Ball.py Project: svejlgaard/AppStat2020Project

    def get_angle(self, flip = True):
        angles = get_angles()

        angle_dict = dict()
        for a in ['r','l']:
            phi = angles[[f'inc_{a}']]
            flip = angles[[f'inc_flip_{a}']]
            err = 1

            phi_des = DescrStatsW(phi, weights = 1/(np.array([err, err, err]))**2)
            phi_mean = phi_des.mean
            phi_mean_err = phi_des.std / np.sqrt(3)
            
            flip_des = DescrStatsW(flip, weights = 1/(np.array([err, err, err]))**2)
            flip_mean = flip_des.mean
            flip_mean_err = flip_des.std / np.sqrt(3)

            true_phi = (phi_mean + flip_mean) / 2

            true_phi_err = np.sqrt(phi_mean_err**2 + flip_mean_err**2) / 2
            
            angle_dict.update({f'phi_{a}': [true_phi, true_phi_err]})
        

        self.phi_r_mu = angle_dict['phi_r'][0]
        self.phi_r_std = angle_dict['phi_r'][1]


        self.phi_l_mu = angle_dict['phi_l'][0]
        self.phi_l_std = angle_dict['phi_l'][1]

        self.dphi_mu_g = (self.phi_r_mu - self.phi_l_mu) / 2
        self.dphi_std_g = np.sqrt(self.phi_r_std**2 + self.phi_l_std**2)
        self.dphi_mu_err_g = self.dphi_std_g / np.sqrt(2) + 0.05


        upper = (self.a_dict['a_L'][0] - self.a_dict['a_R'][0]) * np.sin(np.radians(angle_dict[f'phi_{self.orientation.lower()}'][0]))
        lower = (self.a_dict['a_L'][0] + self.a_dict['a_R'][0]) * np.cos(np.radians(angle_dict[f'phi_{self.orientation.lower()}'][0]))
        
        self.dphi_mu_a = upper / lower
        
        x = np.radians(angle_dict[f'phi_{self.orientation.lower()}'][0])
        L = self.a_dict['a_L'][0]
        R = self.a_dict['a_R'][0]

        err1 = (np.sin(x)/((L+R)*np.cos(x))-(L-R)*np.sin(x)/((L+R)**2*np.cos(x)))**2 * self.phi_l_std**2
        err2 = (-np.sin(x)/((L+R)*np.cos(x))-(L-R)*np.sin(x)/((L+R)**2*np.cos(x)))**2 * self.phi_r_std**2
        err3 = ((L-R)/(L+R)+(L-R)*np.sin(x)**2/((L+R)*np.cos(x)**2))**2 * angle_dict[f'phi_{self.orientation.lower()}'][1]**2
        
        self.dphi_std_a = np.sqrt(err1 + err2 + err3) + 0.05
        
        combined_dphi = DescrStatsW([self.dphi_mu_g, self.dphi_mu_a], weights=1/(np.array([self.dphi_mu_err_g, self.dphi_std_a]))**2)


        self.dphi_mu = combined_dphi.mean
        self.dphi_std = combined_dphi.std / np.sqrt(2)

        self.get_chi2(f'dphi_{self.orientation}', np.array([self.dphi_mu_a,self.dphi_mu_g]), np.array([self.dphi_std_a, self.dphi_mu_err_g]))

Example #9

0

Show file

def _similarity_helper_limited(user1_id, user2_id, solr):
    user1_vector, user2_vector, weight_vector = get_vector_limited(
        user1_id, user2_id)

    data = column_stack((user1_vector, user2_vector))
    result = DescrStatsW(data)[1][0]
    result2 = DescrStatsW(data, weights=weight_vector).corrcoef[1][0]

    print('Pearson similarity ' + str(user2_id) + ' ' + str(result))
    print('DescrStatsW ' + str(user2_id) + ' ' + str(result2))

Example #10

0

Show file

def trades_to_bar(ticks: pd.DataFrame, bar_trigger: str='fixed') -> dict:
    
    if type(ticks) != pd.DataFrame:
        ticks = pd.DataFrame(ticks)
    
    bar = {'bar_trigger': bar_trigger}
    # time
    bar['open_at'] = ticks['utc_dt'].iloc[0]
    bar['close_at'] = ticks['utc_dt'].iloc[-1]
    bar['duration_td'] = bar['close_at'] - bar['open_at']
    # volume
    bar['tick_count'] = ticks.shape[0]
    bar['volume'] = ticks.volume.sum()
    bar['dollars'] = (ticks.volume * ticks.price).sum()
    # price
    bar['price_open'] = ticks.price.values[0]
    bar['price_close'] = ticks.price.values[-1]
    bar['price_low'] = ticks.price.min()
    bar['price_high'] = ticks.price.max()
    bar['price_range'] = bar['price_high'] - bar['price_low']
    bar['price_return'] = bar['price_close'] - bar['price_close']
    # volume weighted price
    dsw = DescrStatsW(data=ticks.price, weights=ticks.volume)
    qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
    bar['price_wq10'] = qtiles[0]
    bar['price_wq50'] = qtiles[1]
    bar['price_wq90'] = qtiles[2]
    bar['price_wq_range'] = bar['price_wq90'] - bar['price_wq10']
    bar['price_wmean'] = dsw.mean
    bar['price_wstd'] = dsw.std
    # jma
    bar['jma_open'] = ticks.jma.values[0]
    bar['jma_close'] = ticks.jma.values[-1]
    bar['jma_low'] = ticks.jma.min()
    bar['jma_high'] = ticks.jma.max()
    bar['jma_range'] = bar['jma_high'] - bar['jma_low']
    bar['jma_return'] = bar['jma_close'] - bar['jma_open']
    # volume weighted jma
    dsw = DescrStatsW(data=ticks.jma, weights=ticks.volume)
    qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
    bar['jma_wq10'] = qtiles[0]
    bar['jma_wq50'] = qtiles[1]
    bar['jma_wq90'] = qtiles[2]
    bar['jma_wq_range'] = bar['jma_wq90'] - bar['jma_wq10']
    bar['jma_wmean'] = dsw.mean
    bar['jma_wstd'] = dsw.std
    # tick/vol/dollar/imbalance
    bar['tick_imbalance'] = ticks.side.sum()
    bar['volume_imbalance'] = (ticks.volume * ticks.side).sum()
    bar['dollar_imbalance'] = (ticks.volume * ticks.price * ticks.side).sum()

    return bar

Example #11

0

Show file

File: find_missing_doi.py Project: eliselepage/DBProcessingScripts

def title_len_stat(mongo_db):
    len_counter_db = collections.Counter()
    len_counter_cr = collections.Counter()
    for col_name in mongo_db.collection_names():
        if col_name not in PAPER_COLLECTIONS:
            continue
        col = mongo_db[col_name]
        query_w_doi = col.find({'doi': {'$exists': True}})
        for doc in query_w_doi:
            if ('metadata' in doc and 'title' in doc['metadata']
                    and isinstance(doc['metadata']['title'], str)):
                len_counter_db[len(doc['metadata']['title'])] += 1
            if ('crossref_raw_result' in doc
                    and 'title' in doc['crossref_raw_result']
                    and isinstance(doc['crossref_raw_result']['title'], list)
                    and len(doc['crossref_raw_result']['title']) == 1):
                len_counter_cr[len(
                    doc['crossref_raw_result']['title'][0])] += 1

    # stat for db titles
    sorted_len = sorted(len_counter_db.keys())
    weights = [len_counter_db[l] for l in sorted_len]
    weighted_stats = DescrStatsW(sorted_len, weights=weights)
    sns.barplot(sorted_len, weights)
    percentile = weighted_stats.quantile(probs=[
        0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.25, 0.5, 0.75, 0.95, 0.97, 0.99
    ])
    print('len_counter_db')
    pprint(len_counter_db)
    print('weighted_stats.mean', weighted_stats.mean)
    print('weighted_stats.std', weighted_stats.std)
    print('percentile')
    print(percentile)

    # stat for cr titles
    sorted_len = sorted(len_counter_cr.keys())
    weights = [len_counter_cr[l] for l in sorted_len]
    weighted_stats = DescrStatsW(sorted_len, weights=weights)
    #     sns.barplot(sorted_len, weights)
    percentile = weighted_stats.quantile(probs=[
        0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.25, 0.5, 0.75, 0.95, 0.97, 0.99
    ])
    print('len_counter_cr')
    pprint(len_counter_cr)
    print('weighted_stats.mean', weighted_stats.mean)
    print('weighted_stats.std', weighted_stats.std)
    print('percentile')
    print(percentile)

    return len_counter_db, len_counter_cr

Example #12

0

Show file

    def angular_linear_correlation(self,
                                   angles,
                                   data,
                                   weights=None,
                                   double_peak=False):
        """
        This function computes an angular-linear correlation.
        When expecting the data to have two symmetrical, opposite peaks (e.g.
        a non-direction selective effect such as horizontal vs. vertical
        instead of up vs down), double_peak should be set to True.

        :param angles: input angles
        :type angles: 1-D array
        :param data: input data
        :type data: 1-D array, same shape as angles
        :param weights: weights to use for correlation
        :type weights: 1-D array, same shape as angles
        :param double_peak: when True, angles are doubled
        :type double_peak: bool

        :return corr: circular correlation
        :type corr: float

        """

        # set weights to one
        if weights is None:
            weights = np.ones_like(angles)

        # In cases of expected periodicity (e.g. data peaks at two opposite angles),
        # the angular data should be scaled:
        if double_peak:
            angles = self.collapse_angles_symmetrically(angles)

        # use formula from the pycircstat package to calculate circular correlation:
        rxs = DescrStatsW(data=np.vstack([data, np.sin(angles)]).T,
                          weights=weights).corrcoef[0, 1]
        rxc = DescrStatsW(data=np.vstack([data, np.cos(angles)]).T,
                          weights=weights).corrcoef[0, 1]
        rcs = DescrStatsW(data=np.vstack([np.sin(angles),
                                          np.cos(angles)]).T,
                          weights=weights).corrcoef[0, 1]

        # rxs = self.functions.wpearson(these_data,np.sin(doubled_angles),weights)
        # rxc = self.functions.wpearson(these_data,np.cos(doubled_angles),weights)
        # rcs = self.functions.wpearson(np.sin(doubled_angles),np.cos(doubled_angles),weights)
        # compute angular-linear correlation (equ. 27.47)
        corr = np.sqrt((rxc**2 + rxs**2 - 2 * rxc * rxs * rcs) / (1 - rcs**2))

        return corr

Example #13

0

Show file

File: test_weightstats.py Project: yarikoptic/pystatsmodels

    def setup_class(self):
        np.random.seed(9876789)
        n1, n2 = 20, 30
        m1, m2 = 1, 1.2
        x1 = m1 + np.random.randn(n1, 3)
        x2 = m2 + np.random.randn(n2, 3)
        w1 = np.random.randint(1, 4, n1)
        w2 = np.random.randint(1, 4, n2)

        self.x1, self.x2 = x1, x2
        self.w1, self.w2 = w1, w2
        self.d1w = DescrStatsW(x1, weights=w1, ddof=0)
        self.d2w = DescrStatsW(x2, weights=w2, ddof=1)
        self.x1r = self.d1w.asrepeats()
        self.x2r = self.d2w.asrepeats()

Example #14

0

Show file

    def _get_weighted_stats(self, X, y, weights):
        """Gets the weighted mean and standard deviation for each variable
        in X and y, based on an array of weights."""
        Xw_stat_obj = DescrStatsW(self._X, weights=self._w, ddof=1)

        # Weighted standard deviation for X vars:
        std_Xw = np.sqrt(np.abs(Xw_stat_obj.var_ddof(1)))  # abs for w_sum <1
        mean_Xw = Xw_stat_obj.mean  # Numpy array shape: (regressors, )

        yw_stat_obj = DescrStatsW(self._y, weights=self._w, ddof=1)
        # Weighted standard deviation for y:
        std_yw = np.sqrt(np.abs(yw_stat_obj.var_ddof(1)))  # abs for w_sum <1
        mean_yw = yw_stat_obj.mean  # Numpy array shape: (regressors, )

        return mean_Xw, mean_yw, std_Xw, std_yw

Example #15

0

Show file

File: test_weightstats.py Project: GonzaloUlla/Bankruptcy-Prediction-using-Machine-Learning-Algorithms-in-Python

    def setup_class(cls):
        np.random.seed(9876789)
        n1, n2 = 20, 20
        m1, m2 = 1, 1.2
        x1 = m1 + np.random.randn(n1, 3)
        x2 = m2 + np.random.randn(n2, 3)
        w1 = np.random.randint(1, 4, n1)
        w2 = np.random.randint(1, 4, n2)

        cls.x1, cls.x2 = x1, x2
        cls.w1, cls.w2 = w1, w2
        cls.d1w = DescrStatsW(x1, weights=w1, ddof=1)
        cls.d2w = DescrStatsW(x2, weights=w2, ddof=1)
        cls.x1r = cls.d1w.asrepeats()
        cls.x2r = cls.d2w.asrepeats()

Example #16

0

Show file

File: global_coverage_report_digital_panel_4_CovReport.py Project: ubit-hnrg/NGStools

def globaldepth(coverage_hist):

    coverage_hist['cumsum'] = 1 - coverage_hist.frequency.cumsum()

    weighted_stats = DescrStatsW(coverage_hist.DP - 1,
                                 weights=coverage_hist.BPs,
                                 ddof=0)
    ##que diferencia hay con coverage_hist.DP.mean()??????????

    global_depth = {}
    b, bases_20x, depth_20X = depth_fraction(coverage_hist, thr=20)
    global_depth.update({'bases_totales': int(b)})

    global_depth.update({'mean_DP': round(weighted_stats.mean, signif)})
    global_depth.update({'median_DP': weighted_stats.quantile(0.5).values[0]})
    #global_depth.update({'std_DP':round(weighted_stats.std,signif)})
    #global_depth.update({'q25_DP':weighted_stats.quantile(0.25).values[0]})
    #global_depth.update({'q75_DP':weighted_stats.quantile(0.75).values[0]})
    #global_depth.update({'q95_DP':weighted_stats.quantile(0.95).values[0]})
    #global_depth.update({'q95_DP':weighted_stats.quantile(0.95).values[0]})

    #global_depth.update({'dp>=1':round(depth_fraction(coverage_hist,thr=1),signif)})
    #global_depth.update({'dp>=10':round(depth_fraction(coverage_hist,thr=10),signif)})
    global_depth.update({'bases_20X': int(bases_20x)})
    #global_depth.update({'bases_20X(%)':(100*(bases_20x/b)})
    global_depth.update({'dp>=20': round(depth_20X, 3)})

    #global_depth.update({'dp>=20':round(depth_fraction(coverage_hist,thr=20),signif)})
    #global_depth.update({'dp>=30':round(depth_fraction(coverage_hist,thr=30),signif)})
    #global_depth.update({'dp>=50':round(depth_fraction(coverage_hist,thr=50),signif)})
    #global_depth.update({'dp>=100':round(depth_fraction(coverage_hist,thr=100),signif)})

    return (global_depth)

Example #17

0

Show file

    def fit_single(self,
                   pos_left,
                   pos_right,
                   weights,
                   tol=1e-4,
                   maxiter=4000,
                   verbose=False):
        left, right = np.asarray(pos_left), np.asarray(pos_right)
        debugs = list() if verbose else None
        centers = (left + right) / 2.0
        statsW = DescrStatsW(centers, weights=np.array(weights))
        init_paras = self._paras_compose_([statsW.mean], [statsW.cov], [1.0])

        method = 'Nelder-Mead'
        res = opt.minimize(self._single_optpara,
                           init_paras,
                           args=(left, right, weights, debugs),
                           method=method,
                           tol=tol,
                           options={
                               'maxiter': maxiter,
                               'disp': verbose
                           })
        if verbose:
            print("Method:{}; Initial parameter: {};".format(
                method, init_paras))
            print("Converged Parameter: {}".format(res.x))

        mus, covs, ws = self._paras_decompose_(res.x, 1)
        if det(covs[0]) == 0.0:
            print("Warning: covariance processed:")
            print("\t pre-optimal mus: {}, cov: {}".format(mus[0], covs[0]))
            covs[0] = self._cov_process_(covs[0])

        return mus, covs, ws, res.fun

Example #18

0

Show file

File: utils.py Project: zzzcy-coder/Barra

def Newey_West(ret, q=2, tao=252):
    '''
    Newey_West方差调整
    时序上存在相关性时，使用Newey_West调整协方差估计
    factor_ret: DataFrame, 行为时间，列为因子收益
    q: 假设因子收益为q阶MA过程
    tao: 算协方差时的半衰期
    '''
    from functools import reduce
    from statsmodels.stats.weightstats import DescrStatsW

    T = ret.shape[0]  #时序长度
    K = ret.shape[1]  #因子数
    if T <= q or T <= K:
        raise Exception("T <= q or T <= K")

    names = ret.columns
    weights = 0.5**(np.arange(T - 1, -1, -1) / tao)  #指数衰减权重
    weights = weights / sum(weights)

    w_stats = DescrStatsW(ret, weights)
    ret = ret - w_stats.mean

    ret = np.matrix(ret.values)
    Gamma0 = [weights[t] * ret[t].T @ ret[t] for t in range(T)]
    Gamma0 = reduce(np.add, Gamma0)

    V = Gamma0  #调整后的协方差矩阵
    for i in range(1, q + 1):
        Gammai = [weights[i + t] * ret[t].T @ ret[i + t] for t in range(T - i)]
        Gammai = reduce(np.add, Gammai)
        V = V + (1 - i / (1 + q)) * (Gammai + Gammai.T)

    return (pd.DataFrame(V, columns=names, index=names))

Example #19

0

Show file

File: stats_helper.py Project: ejpjapan/TSMOM

def compute_summary_statistics(dbm: database_manager.DatabaseManager, tbl_name: str) -> Optional[Dict[str, Tuple]]:
    """
    Computes summary statistics for given table.
    :param dbm: A DatabaseManager instance.
    :param tbl_name: name of the table to compute monthly return for.
    :return: dictionary containing various statistics.
    """
    df, info, start_date = finance_metrics.compute_monthly_returns(dbm, tbl_name)

    if df is not None and info is not None:
        stat = {}

        dsw = DescrStatsW(df['Monthly_Return'].values)

        stat['table_name'] = tbl_name
        stat['contract_name'] = info[1]
        stat['type'] = info[3] if info[3] is not None else None
        stat['subtype'] = info[4] if info[4] is not None else None
        stat['start-date'] = start_date
        stat['ar'] = df['Monthly_Return'].mean() * 12
        stat['vol'] = df['Monthly_Return'].std() * np.sqrt(12)
        stat['t-stat'] = dsw.ttest_mean(alternative='larger')[0]
        stat['p-value'] = dsw.ttest_mean(alternative='larger')[1]
        stat['kurt'] = df['Monthly_Return'].kurt()
        stat['skew'] = df['Monthly_Return'].skew()

        return stat

    return None

Example #20

0

Show file

def calc_statistics(_totalVals, _distances, name, mask):
    if len(_totalVals) == 0 or len(mask) == 0:
        print name, "- No data available!\n"
        return (name, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
                np.nan, np.nan, np.nan, np.nan, np.nan, np.nan)

    totalVals = _totalVals[mask, :]
    distances = _distances[mask]

    #distances = np.array(distances);
    if (len(distances) != len(totalVals)):
        raise ValueError("distances must be the same length as data values")

    weights = np.transpose(np.array([distances] * totalVals.shape[1]))

    notnan = np.where(np.isnan(totalVals) == False)
    totalStats = DescrStatsW(totalVals[notnan],
                             weights=weights[notnan],
                             ddof=0)
    totalGrandMean = totalStats.mean
    totalGrandSD = totalStats.std

    #print name, "total onto-shelf current (m/s):", totalGrandMean, "+/-", totalGrandSD;

    #Return the mean ontoshelf current for this region (weighted by segment distance)
    #Return the SD of the same.
    return (name, totalGrandMean, totalGrandSD)

Example #21

0

Show file

File: water_mass_binning.py Project: sida-wang/ocean-analysis

def weighted_percentiles(data, weights, percentiles):
    """Return the weighted percentiles.

    Args:
      data (np.ndarray) : Bin variable (e.g. temperature, salinity)
      weights (np.ndarray): Weights (e.g. cell volume, area)
      percentiles (np.ndarray): Array of requested percentiles (e.g. 0-1 by 0.01)

    """

    assert percentiles.max() <= 1.0
    assert percentiles.min() >= 0.0

    wq = DescrStatsW(data=data, weights=weights)
    bin_edges = wq.quantile(probs=percentiles, return_pandas=False)

    # manual method does not give a clean results...
    #ix = np.argsort(data)
    #data = data[ix] # sort data
    #weights = weights[ix] # sort weights
    #cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
    #perc = np.arange(0, 1.01, 0.01)
    #test2 = np.interp(perc, cdf, data)

    return bin_edges

Example #22

0

Show file

File: bc_analysis_fields.py Project: AJueling/CESM

    def spatial_correlation(self,
                            field_A,
                            field_B,
                            method=None,
                            selection=None):
        """ correlate two 2D fields """
        if np.shape(field_A) != np.shape(field_B):  # have to regrid
            A, B = self.regrid_to_lower_resolution(field_A, field_B)
        else:
            A, B = field_A, field_B
        assert np.shape(A) == np.shape(B)
        domain = self.determine_domain(A)

        AREA = xr_AREA(domain)
        MASK = boolean_mask(domain=domain, mask_nr=0)
        if type(selection) == int:
            MASK = boolean_mask(domain=domain, mask_nr=selection)
        elif type(selection) == dict:
            MASK, AREA = MASK.sel(selection), AREA.sel(selection)
            A, B = A.sel(selection), B.sel(selection)

        D = np.any(np.array(
            [np.isnan(A).values,
             np.isnan(B).values, (MASK == 0).values]),
                   axis=0)
        A = xr.where(D, np.nan,
                     A).stack(z=('latitude', 'longitude')).dropna(dim='z')
        B = xr.where(D, np.nan,
                     B).stack(z=('latitude', 'longitude')).dropna(dim='z')
        C = xr.where(D, np.nan,
                     AREA).stack(z=('latitude', 'longitude')).dropna(dim='z')
        d = DescrStatsW(np.array([A.values, B.values]).T, weights=C)
        spatial_corr_coef = d.corrcoef[0, 1]

        return spatial_corr_coef

Example #23

0

Show file

File: rooting_methods_general.py Project: adamhockenberry/tree-rooting

def branch_scan_MinVar_general(modifier, ds_dists, us_dists, all_weights):
    """
    This is the function to minimize in order to optimaly situate the root on the putative
    branch. Note that this function is only valid for minimizing the variance of schemes 
    where the weights do not change with regard to changing the root.

    Input/s:
    modifier - This is the parameter to be optimized! Essentially a float of how much to shift the
                root left or right so as to minimize the root-to-tip variance
    ds_dists - array of downstream root-to-tip distances
    us_dists - array of upstream root-to-tip distances
    all_weights - array of downstream and upstream terminal weights
    
    Output/s:
    dsw.var - weighted variance
    
    """
    #Adjust the downstream and upstream root-to-tip distances with the modifier
    temp_ds_dists = ds_dists + modifier
    temp_us_dists = us_dists - modifier
    all_dists = np.concatenate((temp_ds_dists, temp_us_dists))

    #Calculate weighted variance and return
    dsw = DescrStatsW(all_dists, all_weights)
    return dsw.var

Example #24

0

Show file

def plot_fill_between(data, data_dir, label, n_dis, color='magenta'):
    plt.figure(figsize=(10, 8))
    left_edge = []
    right_edge = []
    mean_res = []
    iters = []
    for key, value in data.items():
        left, right = DescrStatsW(value).tconfint_mean()
        left_edge.append(left)
        right_edge.append(right)
        iters.append(key + 1)
        mean_res.append(np.mean(value))

    plt.fill_between(iters, left_edge, right_edge, color='violet')
    plt.plot(iters, mean_res, color=color, lw=5)

    plt.xlabel('iteration', fontsize=18)
    plt.ylabel(label, fontsize=18)
    plt.xlim([1, len(iters)])
    plt.ylim([min(mean_res) - 0.05, max(mean_res) + 0.05])
    plt.xticks(list(plt.xticks()[0][1:]) + [1])
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.legend(['TestCV'], fontsize=18, loc=2)
    plt.savefig('{}/{}:{}_ARTM_smart.eps'.format(data_dir, label, n_dis))
    plt.show()
    return mean_res

Example #25

0

Show file

File: Ball.py Project: svejlgaard/AppStat2020Project

 def get_diameter(self):
     self.get_chi2(f'Diameter', self.diameter.values, np.array([0.05, 0.05, 0.05]))
     all_diameter = DescrStatsW(self.diameter, weights=1/(np.array([0.05, 0.05, 0.05]))**2)
     self.diameter_mean = all_diameter.mean
     self.diameter_mean_err = all_diameter.std/np.sqrt(len(self.diameter))
     self.diameter_std = all_diameter.std
     print(f' The diameter of the {self.ball} is {1000*self.diameter_mean:.2f} +- {1000*self.diameter_mean_err:.2f}')

Example #26

0

Show file

File: MLutils.py Project: littlewine/snorkel-ml

def get_lower_upper_CI(scores):
    if scores.shape[1] > 1:  #then 2-D
        lower_bound, upper_bound = DescrStatsW(
            scores.T).tconfint_mean() - scores.mean(axis=1)
    else:
        lower_bound, upper_bound = 0, 0
    return abs(lower_bound)

Example #27

0

Show file

File: global_coverage_report_inLibrary.py Project: ubit-hnrg/NGStools

def globaldepth(coverage_hist):

    coverage_hist['cumsum'] = 1 - coverage_hist.frequency.cumsum()
    weighted_stats = DescrStatsW(coverage_hist.DP - 1,
                                 weights=coverage_hist.BPs,
                                 ddof=0)

    global_depth = {}
    global_depth.update({'mean_DP': round(weighted_stats.mean, signif)})
    global_depth.update({'median_DP': weighted_stats.quantile(0.5).values[0]})
    global_depth.update({'std_DP': round(weighted_stats.std, signif)})
    global_depth.update({'q25_DP': weighted_stats.quantile(0.25).values[0]})
    global_depth.update({'q75_DP': weighted_stats.quantile(0.75).values[0]})
    global_depth.update({'q95_DP': weighted_stats.quantile(0.95).values[0]})
    global_depth.update({'q95_DP': weighted_stats.quantile(0.95).values[0]})

    global_depth.update(
        {'dp>=1': round(depth_fraction(coverage_hist, thr=1), signif)})
    global_depth.update(
        {'dp>=10': round(depth_fraction(coverage_hist, thr=10), signif)})
    global_depth.update(
        {'dp>=20': round(depth_fraction(coverage_hist, thr=20), signif)})
    global_depth.update(
        {'dp>=30': round(depth_fraction(coverage_hist, thr=30), signif)})
    global_depth.update(
        {'dp>=50': round(depth_fraction(coverage_hist, thr=50), signif)})
    global_depth.update(
        {'dp>=100': round(depth_fraction(coverage_hist, thr=100), signif)})
    return (global_depth)

Example #28

0

Show file

    def test_comparemeans_convenient_interface(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        d1 = DescrStatsW(x1_2d)
        d2 = DescrStatsW(x2_2d)
        cm1 = CompareMeans(d1, d2)

        # smoke test for summary
        from statsmodels.iolib.table import SimpleTable
        for use_t in [True, False]:
            for usevar in ['pooled', 'unequal']:
                smry = cm1.summary(use_t=use_t, usevar=usevar)
                assert_(isinstance(smry, SimpleTable))

        # test for from_data method
        cm2 = CompareMeans.from_data(x1_2d, x2_2d)
        assert_(str(cm1.summary()) == str(cm2.summary()))

Example #29

0

Show file

def localdepth(coverage_hist):

    coverage_hist['cumsum'] = 1 - coverage_hist.frequency.cumsum()
    weighted_stats = DescrStatsW(coverage_hist.DP - 1,
                                 weights=coverage_hist.BPs,
                                 ddof=0)

    local_depth = {}
    local_depth.update({'mean_DP': round(weighted_stats.mean, signif)})
    #local_depth.update({'median_DP':weighted_stats.quantile(0.5).values[0]})
    local_depth.update({'std_DP': round(weighted_stats.std, signif)})

    local_depth.update(
        {'dp>=1': (round(depth_fraction(coverage_hist, thr=1), signif)) * 100})
    local_depth.update(
        {'dp>=5': (round(depth_fraction(coverage_hist, thr=5), signif)) * 100})
    local_depth.update({
        'dp>=10': (round(depth_fraction(coverage_hist, thr=10), signif)) * 100
    })
    local_depth.update({
        'dp>=20': (round(depth_fraction(coverage_hist, thr=20), signif)) * 100
    })
    local_depth.update({
        'dp>=30': (round(depth_fraction(coverage_hist, thr=30), signif)) * 100
    })
    #local_depth.update({'mean_DP':round(weighted_stats.mean,signif)})

    #local_depth.update({'dp>=50':round(depth_fraction(coverage_hist,thr=50),signif)})
    #local_depth.update({'dp>=100':round(depth_fraction(coverage_hist,thr=100),signif)})
    return pd.Series(local_depth)

Example #30

0

Show file

def _similarity_helper_2(user1_id, user2_id, user1_vector, solr):
    query = 'doc_type:score AND users:({} AND {})'.format(user1_id, user2_id)
    solr.delete(q=query)

    user2_vector, depth_vector = get_vector_tf_idf(user2_id)
    data = column_stack((user1_vector, user2_vector))

    # result = DescrStatsW(data).corrcoef[1][0]
    result2 = DescrStatsW(data, weights=depth_vector).corrcoef[1][0]

    mutual_friends = get_mutual_friends(user1_id, user2_id, solr)

    new_score = [{
        'doc_type': 'score',
        'users': [user1_id, user2_id],
        'similarity': result2,
        'mutual_friends': mutual_friends,
        'friends_count': len(mutual_friends)
    }]

    solr.add(new_score)
    solr.commit()

    # print('Pearson similarity ' + str(user2_id) + ' ' + str(result))
    print('DescrStatsW ' + str(user2_id) + ' ' + str(result2))