Python Series.stdの例、pandas.Series.std Pythonの例

コード例 #1

0

ファイルを表示

 def test_nanstd_roundoff(self):
     # Regression test for GH 10242 (test data taken from GH 10489). Ensure
     # that variance is stable.
     data = Series(766897346 * np.ones(10))
     for ddof in range(3):
         result = data.std(ddof=ddof)
         assert result == 0.0

コード例 #2

0

ファイルを表示

ファイル: text_model.py プロジェクト: cgoldammer/simple_text_analysis

def aggregate_features(text):
    f = collections.OrderedDict()
    f['questions'] = text.count('?')
    f['exclamation'] = text.count('!')
    f['commas'] = text.count(',')

    tb = TextBlob(text)

    sentences = tb.sentences
    f['number of sentences'] = len(sentences)
    f['number of words'] = len(tb.words)
    lengths = Series([len(s) for s in sentences])
    f['length_std'] = lengths.std() if f['number of sentences'] > 1 else 0

    share_stopwords = Series([(w in cachedStopWords) for w in tb.words]).mean()
    f['share stopwords'] = share_stopwords

    certain_words = ['sure', 'certain', 'undoubt', 'clear', 'absolute']
    certain_number = Series([len(re.findall(cw, text)) for cw in certain_words]).sum()
    f['certain_share'] = certain_number / f['number of words']

    sentiment = TextBlob(text).sentiment._asdict()
    f.update(sentiment)

    return f

コード例 #3

0

ファイルを表示

ファイル: test_nanops.py プロジェクト: ivannz/pandas

 def test_nanstd_roundoff(self):
     # Regression test for GH 10242 (test data taken from GH 10489). Ensure
     # that variance is stable.
     data = Series(766897346 * np.ones(10))
     for ddof in range(3):
         result = data.std(ddof=ddof)
         self.assertEqual(result, 0.0)

コード例 #4

0

ファイルを表示

ファイル: significance.py プロジェクト: pavlin-policar/orange3-prototypes

def _check_Xy(X: pd.DataFrame,
              y: pd.Series, *,
              norm_y=False) -> Tuple[pd.Series, pd.Series]:
    if np.ndim(X) == 1:
        X = pd.Series(X).to_frame()
    elif np.ndim(X) == 2:
        X = pd.DataFrame(X)

    assert X.ndim == 2
    assert np.ndim(y) == 1
    assert len(X) == len(y)

    valid = ~X.isnull().any(1).values
    X = pd.Series(list(zip(*X.values[valid].T)),
                  name=tuple(X.columns)).astype('category')
    y = pd.Series(y).reset_index(drop=True)[valid]

    if is_object_dtype(y):
        y = pd.Categorical(y)

    if norm_y:
        assert is_numeric_dtype(y)
        y = (y - y.mean()) / y.std()

    return X, y

コード例 #5

0

ファイルを表示

ファイル: plot.py プロジェクト: mesosxzan/IBATS_Common

def plot_norm(data: pd.Series, bins=10, ax=None, is_show_plot=None):
    """
    显示当前数据的正太分布曲线
    :param data:
    :param bins: bar 数量
    :param ax: 如果为None，则新建一个画布
    :param is_show_plot: 是否展示
    :return: n, bins_v, mean, std
    """
    if ax is None:
        fig, ax = plt.subplots()
        if is_show_plot is None:
            is_show_plot = True

    if is_show_plot is None:
        is_show_plot = False

    n, bins_v = np.histogram(data, bins=bins)

    mu = data.mean()  # mean of distribution
    sigma = data.std()  # standard deviation of distribution
    # def norm_func(x, mu, sigma):
    #     pdf = np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))
    #     return pdf
    # y = norm_func(bins, mu, sigma)  # 与 mlab.normpdf(bins, mu, sigma) 相同
    # y = mlab.normpdf(bins, mu, sigma)
    y = stats.norm.pdf(bins_v, loc=mu, scale=sigma)
    ax.plot(bins_v, y, '--')
    plt.grid(True)
    if is_show_plot:
        plt.show()

    return n, bins_v, mu, sigma

コード例 #6

0

ファイルを表示

ファイル: _transformer_1d.py プロジェクト: zhangbk920209/adtk

    def _predict_core(self, s: pd.Series) -> pd.Series:
        mean = s.mean()
        std = s.std()

        if std == 0:
            std = 1

        return (s - mean) / std

コード例 #7

0

ファイルを表示

    def __init__(self, col: Series):
        col: ndarray = col.to_numpy()

        self._min: number = col.min(initial=None)
        self._max: number = col.max(initial=None)
        self._range: number = self._max - self._min
        self._mean: number = col.mean()
        self._std: number = col.std()

コード例 #8

0

ファイルを表示

def kurtosis(returns: pd.Series):
    """
    Computes the skewness of a asset series
    """
    relative_ret = returns - returns.mean()
    sigma = returns.std(ddof=0)
    exp = (relative_ret**4).mean()
    return exp / sigma**4

コード例 #9

0

ファイルを表示

ファイル: mayi.py プロジェクト: youshaox/ATEC_Payment_Risk_Recognition_Competition

 def fill_outliers(col: pd.Series):
     """ Remove outliers of each col
     """
     mean, std = col.mean(), col.std()
     upper, lower = mean + 3 * std, mean - 3 * std
     col[col > upper] = np.floor(upper)
     col[col < lower] = np.floor(lower)
     return col.values

コード例 #10

0

ファイルを表示

ファイル: metrics.py プロジェクト: zhangxinaaaa/algorithmic-trading-with-python

def calculate_annualized_volatility(return_series: pd.Series) -> float:
    """
    Calculates annualized volatility for a date-indexed return series. 
    Works for any interval of date-indexed prices and returns.
    """
    years_past = get_years_past(return_series)
    entries_per_year = return_series.shape[0] / years_past
    return return_series.std() * np.sqrt(entries_per_year)

コード例 #11

0

ファイルを表示

        def _mask_outliers(vec: pd.Series, stdv_times):
            vec_mean = vec.mean()
            vec_stdv = vec.std()
            upper = vec_mean + vec_stdv * stdv_times
            lower = vec_mean - vec_stdv * stdv_times
            vec[((lower > vec) | (vec > upper))] = np.nan

            return vec

コード例 #12

0

ファイルを表示

 def before_after_3sigma(data: pd.Series) -> pd.Series:
     miu = data.mean()
     sigma = data.std()
     threshold_down = miu - 3 * sigma
     threshold_up = miu + 3 * sigma
     data[data.ge(threshold_up)] = threshold_up
     data[data.le(threshold_down)] = threshold_down
     return data

コード例 #13

0

ファイルを表示

ファイル: NumericColumn.py プロジェクト: THUyansh/LSRL

 def from_series(feature_name: str, series: Series):
     """从pandas.Series中构造"""
     assert types.is_numeric_dtype(series), series.dtypes
     return NumericColumn(feature_name=feature_name,
                          min_value=series.min(),
                          max_value=series.max(),
                          mean_value=series.mean(),
                          std_value=series.std())

コード例 #14

0

ファイルを表示

def sharpe_ratio(corrs: pd.Series) -> np.float32:
    """
    Calculate the Sharpe ratio for Numerai by using grouped per-era data

    :param corrs: A Pandas Series containing the Spearman correlations for each era
    :return: A float denoting the Sharpe ratio of your predictions.
    """
    return corrs.mean() / corrs.std()

コード例 #15

0

ファイルを表示

def fit_transform_normalize_y(y_train):
    y_train_bx, _ = boxcox(y_train)
    y_train_bx = Series(y_train_bx, y_train.index)
    outliers = tukey_outlier_test(y_train_bx)
    y_train_outliers_removed = y_train.drop(outliers)
    y_train_bx, bx_lambda = boxcox(y_train_outliers_removed)
    y_train_bx = Series(y_train_bx, y_train_outliers_removed.index)
    y_train_bx_whitened = (y_train_bx - y_train_bx.mean()) / y_train_bx.std()
    return y_train_bx_whitened, bx_lambda

コード例 #16

0

ファイルを表示

def roys_safety_first_criterion(portfolio_returns: pd.Series, minimum_threshold=0.02, period=252):
    """

    :param portfolio_returns: Pandas series or dataframe representing percentage changes of the security (or portfolio) returns over time. It should be same time range and frequency as risk free rates
    :param minimum_threshold: minimum acceptable return, below which the returns are less desirable.
    :param period: period to compute statistics of returns for. For instance, to compute yearly, then input 252, and to compute monthly, then input 21.
    :return:
    """
    return (portfolio_returns.mean() * period - minimum_threshold) / (portfolio_returns.std() * math.sqrt(period))

コード例 #17

0

ファイルを表示

ファイル: core.py プロジェクト: keitakurita/torchtable

 def fit(self, x: pd.Series):
     if self.method == "Gaussian":
         self.mean, self.std = x.mean(), x.std()
     elif self.method == "RankGaussian":
         # TODO: store state
         pass
     elif self.method == "MinMax":
         self.min, self.max = x.min(), x.max()
     return self

コード例 #18

0

ファイルを表示

def stdev(items):
    """Calculate stdev from the items.

    :param items: Stdev is calculated from these items.
    :type items: list
    :returns: Stdev.
    :rtype: float
    """
    return Series.std(Series(items))

コード例 #19

0

ファイルを表示

def filter_outliers(data: pd.Series, std: int=3) -> pd.Series:
    """
    Remove outliers from ``Series``.

    :param data: data to be filtered
    :param std: number of standard deviations to be filtered. Default is 3
    :return: ``Series`` filtered out of outliers
    """
    return data[(data - data.mean()).abs() <= (std * data.std())]

コード例 #20

0

ファイルを表示

 def z_score(data: pd.Series):
     """
     :param data:
     :return:
     """
     miu = data.mean()
     sigma = data.std()
     stand = (data - miu) / sigma
     return stand

コード例 #21

0

ファイルを表示

ファイル: test_describe.py プロジェクト: Aathi410/Pro123

 def test_describe_ints(self):
     ser = Series([0, 1, 2, 3, 4], name="int_data")
     result = ser.describe()
     expected = Series(
         [5, 2, ser.std(), 0, 1, 2, 3, 4],
         name="int_data",
         index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
     )
     tm.assert_series_equal(result, expected)

コード例 #22

0

ファイルを表示

ファイル: statistics.py プロジェクト: samwu101/GS-Bot

def winsorize(x: pd.Series, limit: float = 2.5, w: int = 0) -> pd.Series:
    """
    Limit extreme values in series

    :param x: time series of prices
    :param limit: max z-score of values
    :param w: window: number of observations to use (defaults to length of series)
    :return: timeseries of winsorized values

    **Usage**

    Cap and floor values in the series which have a z-score greater or less than provided value. This function will
    restrict the distribution of values. Calculates the sample standard deviation and adjusts values which
    fall outside the specified range to be equal to the upper or lower limits

    Lower and upper limits are defined as:

    :math:`upper = \mu + \sigma \\times limit`

    :math:`lower = \mu - \sigma \\times limit`

    Where :math:`\mu` and :math:`\sigma` are sample mean and standard deviation. The series is restricted by:

    :math:`R_t = max( min( X_t, upper), lower )`

    See `winsorising <https://en.wikipedia.org/wiki/Winsorizing>`_ for additional information

    **Examples**

    Generate price series and winsorize z-score of returns over :math:`22` observations

    >>> prices = generate_series(100)
    >>> winsorize(zscore(returns(prices), 22))

    **See also**

    :func:`zscore` :func:`mean` :func:`std`

    """
    w = w or x.size

    if x.size < 1:
        return x

    assert w, "window is not 0"

    mu = x.mean()
    sigma = x.std()

    high = mu + sigma * limit
    low = mu - sigma * limit

    ret = ceil(x, high)
    ret = floor(ret, low)

    return ret

コード例 #23

0

ファイルを表示

def CZI(x: pd.Series):
    """ Pearson Type III distribution """
    zsi = (x - x.mean()) / x.std()
    cs  = np.power(zsi, 3) / len(x)
    czi = (
        6.0 / cs * np.power(
            (cs / 2.0 * zsi + 1.0), 1.0 / 3.0
        ) - 6.0 / cs + cs / 6.0
    )
    return czi

コード例 #24

0

ファイルを表示

def calc_value_at_risk(returns: pd.Series, alpha=0.95, hist=True) -> float:
    """
    Value at risk in %
    """
    if hist:
        return returns.quantile(q=1 - alpha)
    else:
        mu = returns.mean()
        sigma = returns.std()
        return norm.ppf(1 - alpha, mu, sigma)

コード例 #25

0

ファイルを表示

ファイル: comparator.py プロジェクト: stenskjaer/oh-my-bills

 def _bigger_than_cutoff(scores: pd.Series) -> pd.Series:
     """Identifies all the values that have a similarity scores bigger than the
     first standard deviation from the mean. If they are all identical we return
     everything.
     """
     if scores.mean() == 1.0:
         return scores
     dev = scores.std()
     most_similar = scores[scores > dev]
     return most_similar

コード例 #26

0

ファイルを表示

ファイル: feature_extraction.py プロジェクト: hyh2010/GEE

    def std(grouped_data: pd.Series) -> float:
        """
        Extract standard deviation of a given pandas Series
        :param grouped_data: grouped data
        :type grouped_data: pd.Series
        :return: standard deviation value
        :rtype: float
        """

        return grouped_data.std()

コード例 #27

0

ファイルを表示

def annualized_vol(r: pd.Series, periods_per_year):
    """
    Inputs: r:pd.Series or pd.DataFrame
            periods_per_year = frequency of return in a year
                If data has daily return value then periods_per_year = 365 or 252
                If data has monthly return value then periods_per_year = 12            
    Output: Returns annualized Volatility
    """

    return r.std() * (periods_per_year**0.5)

コード例 #28

0

ファイルを表示

ファイル: Util.py プロジェクト: sethmjackson/House-Prices-Advanced-Regression-Techniques-Kaggle-

def removeOutliers(S: pd.Series, inplace=False, printOutput=False):
    result = S = S[~((S - S.mean()).abs() > 3 * S.std())]

    if printOutput:
        print('S Length: ', getRowsNum(S))
        print('results Length: ', getRowsNum(result))
    if inplace:
        S = result
        return None
    else:
        return result

コード例 #29

0

ファイルを表示

ファイル: normalizer.py プロジェクト: Fumipo-Theta/fumipo_stat

def normalizer(series: pd.Series):
    std = series.std()
    mean = series.mean()

    def rescale(v):
        return v * std + mean

    def scale(v):
        return (v - mean) / std

    return (scale, rescale)

コード例 #30

0

ファイルを表示

def fuzzy_smf(series: pd.Series):
    avg_avf = series.mean()
    sd_avf = series.std()
    max_avf = series.max()

    # calculate a
    a = calc_a(avg_avf, sd_avf, max_avf)

    # calculate c
    c = calc_c(avg_avf, sd_avf, max_avf)

    return smf(series, a, c)

コード例 #31

0

ファイルを表示

ファイル: etl.py プロジェクト: benjaminkaplanphd/traveling-salesperson

def whole_number_digits(series: pd.Series) -> int:
    """Determine the relevant number of whole number digits for the given series.  The standard
    deviation, as a measure of the variation of the data, is used.

    Args:
        series: A Pandas Series (of floats)
    Returns:
        The number of whole number digits associated with this series
    """
    stddev = series.std(ddof=0)
    digits = int(math.log10(stddev)) + 1
    return digits

コード例 #32

0

ファイルを表示

ファイル: make_data.py プロジェクト: morrislab/plos-medicine-joint-patterns

def z_transform(x: pd.Series) -> pd.Series:
    """
    Z-transforms scores.

    Args:
        x: values to Z-transform

    Returns:
        Z-transformed scores
    """

    return (x - x.mean()) / x.std()

コード例 #33

0

ファイルを表示

 def sharpeRatio(self, returns: pd.Series):
     """
     Sets the Sharpe Ratio for the Asset
     :param returns: Series that contains the returns of the Asset
     """
     if not isinstance(returns, pd.Series):
         raise TypeError(classmethod.__name__ +
                         ': returns must be a Pandas Series.' +
                         '. Current type of the returns: ' +
                         str(type(returns)))
     self._sharpeRatio = (returns.mean() / returns.std()) * (len(returns)**
                                                             0.5)

コード例 #34

0

ファイルを表示

ファイル: processing.py プロジェクト: igncp/encina

  def get_summary_indicators_from_hist(sf, hist, int_index=False):
    seriesHist = Series(hist)
    maxs = {
      'freq': dict()
    }
    
    means = {'freq': seriesHist.mean()}
    medians = {'freq': seriesHist.median()}
    stds = {'freq': seriesHist.std()}
    maxs['freq']['freq'] = seriesHist.max()
    maxs['freq']['index'] = seriesHist.idxmax()
    index_total = 'NA'

    if int_index:
      index = seriesHist.index
      index = index.astype(int)
      index_list = index.tolist()
      index_total = sum([seriesHist[i] * index_list[i] for i in range(len(index_list))])
      index_series = Series(index_list)

      means['index'] = index_series.mean()
      medians['index'] = index_series.median()
      stds['index'] = index_series.std()
      
      maxs['freq']['index'] = int(maxs['freq']['index'])

      maxs['index'] = dict()
      maxs['index']['index'] = max(index_list)
      maxs['index']['freq'] = hist[str(maxs['index']['index'])]

    return {
      'means': means,
      'medians': medians,
      'stds': stds,
      'max': maxs,
      'index_total': index_total
    }

コード例 #35

0

ファイルを表示

ファイル: algo1.py プロジェクト: sankethkatta/info296-ng

k_list = list()
size = len(new_list)
k_sum = 0
for i in range(size):
	if i == 0:
		#k_sum = float(new_list[0][1])
		continue
	k_sum = k_sum + (float(new_list[i-1][1])/float(new_list[i][0]))
	k_list.append(float(new_list[i-1][1])/float(new_list[i][0]))
k_avg = k_sum/size
#consu_ser = Series(data=k_list,index=range(len(k_list)))
consu_ser = Series(data=k_list,index=range(len(k_list)))
#print consu_ser.describe()
mean = consu_ser.mean()
std_dev = consu_ser.std()
modified_list = list()
for i in range(len(k_list)):
	if (k_list[i] < mean + std_dev) and (k_list[i] > mean - std_dev):
		modified_list.append(k_list[i])

plt.hist(modified_list)
plt.show()
consu_ser_mod = Series(data=modified_list,index=range(len(modified_list)))
k_avg = consu_ser_mod.sum()/len(modified_list)

thresh_sum = 0
for i in range(size):
	if i == 0:
		continue
	thresh_sum = thresh_sum + float(new_list[i-1][1]) - float(new_list[i][0])*k_avg

コード例 #36

0

ファイルを表示

ファイル: draft.py プロジェクト: systemtrader/CTPTrader

    account.password
)
traderChannel


#%% 定长队列
from collections import deque
q = deque(maxlen=10)
for i in range(10):
    q.append(i)

#%%
from pandas import Series
s = Series(range(10))
s.mean()
s.std()


#%% 交易结果导出到excel
import os
os.chdir('/home/duhan/github/CTPTrader')
from comhelper import setDjangoEnvironment
setDjangoEnvironment()
from database.models import *
from django_pandas.io import read_frame
from pandas.io.excel import ExcelWriter
df = read_frame(ModelPosition.objects.filter(state='close'))
writer = ExcelWriter('/tmp/output.xls')
df.to_excel(writer)
writer.save()

コード例 #37

0

ファイルを表示

ファイル: pairs_trading_algorithm.py プロジェクト: eomsky49/Betrades

        def adfTest(spread):
            ADF_p_value = ts.adfuller(spread, 1)[1]
            return ADF_p_value

        ADF_p_value = adfTest(spread)


        if ADF_p_value <= 0.05:
            print    "The spread is likely mean-reverting."
        else:
            print    "The spread is not mean-reverting."

        spread = Series(spread)
        signalMean = spread.mean()
        signalDev = spread.std()

        openMult = 1.0
        closeMult = 0.5
        stopLossMult = 4.0

        openSignal = signalDev * openMult;
        closeSignal = signalDev * closeMult;
        stopLossSignal = signalDev * stopLossMult;

        residSpread = spread - signalMean
        residSpread.plot()

        openSignalUp = openSignal * (residSpread * 0 + 1)
        openSignalDown = -openSignal * (residSpread * 0 + 1)
        openSignalUp.plot()

コード例 #38

0

ファイルを表示

ファイル: inlab9.py プロジェクト: klowande/PythonCourse2016

data2.tail()

# summary statistics
# d1990.sum()
data.describe()
data.std()

# how much did total population change between 1990 and 2010? 
for i in [1990,2010]:
	total = sum(data['pop'][data['yr']==i]) # +42,099,904

# TODO: how many people did the average congressperson represent in 1990?

# we could also represent a single variable as a series with hierarchical indexing
p = Series(data['pop'].values, index=[data['st'], data['yr']])
p['North Carolina']
p.mean(level='st')

p.std()

# TODO: calculate standard deviation by year

p.swaplevel('st', 'yr')

# correlation
data['pop'].corr(data['ev'])

# estimate a linear model
model = pd.ols(y=data['ev'], x=data['popm'])
print model 
model.beta