Beispiel #1
0
def generalized_kolmogorov_smirnov(instance: Series,
                                   tag_list: Series,
                                   p: float = 1):
    """Random walk / running sums statistic of GSEA, generalised KS,

    with p=0 kind-of equivalent to normal KS (see proof in GSEA 2005 paper supplement)
    """
    ranked_list = instance.rank(ascending=False) + 1

    n = len(ranked_list)
    t = len(tag_list)

    decrement = 1 / (n - t)

    hit_denominator = ranked_list[tag_list.index].pow(p).sum()

    running_sum_statistic_hits = Series(data=0, index=instance.index)
    running_sum_statistic_hits[
        tag_list.index] = ranked_list[tag_list.index].pow(p) / hit_denominator
    running_sum_statistic_hits = running_sum_statistic_hits.cumsum()

    running_sum_statistic_misses = Series(data=decrement, index=instance.index)
    running_sum_statistic_misses[tag_list.index] = 0
    running_sum_statistic_misses = running_sum_statistic_misses.cumsum()

    return -max(running_sum_statistic_hits - running_sum_statistic_misses,
                key=abs)
Beispiel #2
0
def get_abs_max_drawdown(profit_results: Series) -> float:
    max_drawdown_df = DataFrame()
    max_drawdown_df['cumulative'] = profit_results.cumsum()
    max_drawdown_df['high_value'] = max_drawdown_df['cumulative'].cummax()
    max_drawdown_df['drawdown'] = max_drawdown_df['cumulative'] - max_drawdown_df['high_value']

    return abs(max_drawdown_df['drawdown'].min())
Beispiel #3
0
 def location(self):
     i = 0
     ser = Series([])
     for x in val_segment_dict.values()[self.start: self.end]:
     #for x in val_segment_dict.get(key, []):
         ser = ser.append(Series(len(x)))
     return ser.values, ser.cumsum().values
Beispiel #4
0
def start_date(
        confirmed_cases: pd.Series,
        gt_distribution: np.ndarray,
        r_window_size: Optional[int] = None,
) -> date:
    """
    determine start date of reliable R-estimation: 3 conditions to be fulfilled according to [Cori et al., 2013]
        condition 1: cumulative cases have reached at least 12
        condition 2: at least one mean generation time after index case
        condition 3: if applicable: at least one r_window_size after index case
    """
    cumulative_cases = confirmed_cases.cumsum()
    over_dozen_cases_date = cumulative_cases.ge(12).idxmax()
    index_case_date = confirmed_cases.ne(0).idxmax()
    mean_gt = gt_distribution @ np.arange(len(gt_distribution)) / gt_distribution.sum()

    r_window_size = r_window_size if isinstance(r_window_size, int) else 0

    condition1_date = over_dozen_cases_date
    condition2_date = index_case_date + timedelta(days=np.ceil(mean_gt))
    condition3_date = index_case_date + timedelta(days=r_window_size)

    first_true_date = max(condition1_date, condition2_date, condition3_date)

    return first_true_date
Beispiel #5
0
def get_exp_ratio_idx(data: pd.Series, exp_ratio):
    data = data.cumsum() / data.sum()
    exp_ident = []
    for ratio in exp_ratio:
        ident = data[data > ratio].index[0]
        exp_ident.append(ident)
    return data, exp_ident
def _add_gross_and_costs(gross: pd.Series, costs: pd.Series):
    cumsum_costs = costs.cumsum()
    cumsum_costs_aligned = cumsum_costs.reindex(gross.index, method="ffill")
    costs_aligned = cumsum_costs_aligned.diff()

    net = gross + costs_aligned

    return net
Beispiel #7
0
def get_precision_recall(
        data: pd.Series,
        ties: Optional[np.ndarray] = None) -> Tuple[pd.Series, pd.Series]:
    r = np.arange(1, data.shape[0] + 1)
    c = data.cumsum()

    if ties is not None:
        return fix_tied(ties, c / r), fix_tied(ties, c / data.sum())

    return (c / r), (c / data.sum())
Beispiel #8
0
def cumulative_moving_avg_v3(values: pd.Series, window: int = 5) -> pd.Series:
    '''
    Cumulative Moving Average 
    '''

    # denominator is one-indexed location of the element in the cumsum
    denominator = pd.Series(np.arange(1, values.shape[0]+1), index=values.index)
    result = values.cumsum() / denominator
    # Set the first window elements to nan
    result.iloc[:(window-1)] = np.nan

    return result
Beispiel #9
0
def get_ica_components(X, contribution=0.85):
    X_ica = FastICA(n_components=len(X.columns)).fit(X)

    L2 = Series(np.sum(X_ica.mixing_**2, axis=0))
    L2.sort_values(ascending=False, inplace=True)

    X_S = DataFrame(X_ica.transform(X))
    X_ica_mixing_ = DataFrame(X_ica.mixing_)

    L2.drop(L2.index[L2.cumsum() / L2.sum() >= contribution][1:], inplace=True)

    return X_S.reindex(columns=L2.index).values, X_ica_mixing_.reindex(
        columns=L2.index).values, X_ica.mean_, len(L2)
Beispiel #10
0
def vwap(prices: pd.Series, volume: pd.Series) -> pd.Series:
    """
    df['vwap'] = (np.cumsum(df.quantity * df.price) / np.cumsum(df.quantity))
    vwap = daily volume-weighted average price
    :param prices:
    :param volume:
    :return:
    """
    if isinstance(prices.index, pd.MultiIndex):
        return (volume * prices).groupby(level=1).cumsum() / volume.groupby(
            level=1).cumsum()
    else:
        return (volume * prices).cumsum() / volume.cumsum()
Beispiel #11
0
def get_nome_representacao_do_grupo_v2(df2):
    sentences = [sent for sent in df2['DS_ITEM_CLEAN']]
    sentences_set = set(sentences)
    if len(sentences_set) == 1:  #ou seja, todos os itens sao iguais.
        representacao_grupo = [word for word in sentences_set][0].split()
    else:
        sentences2 = [sent.split() for sent in df2['DS_ITEM_CLEAN']]
        flat_sentences2 = [item for sublist in sentences2 for item in sublist]
        word_counter = collections.Counter(flat_sentences2)
        word_counter = Series(word_counter).sort_values(ascending=False)
        word_counter = DataFrame(word_counter)
        word_counter.columns = ['word']
        word_counter['cumsum'] = word_counter.cumsum().pct_change().replace(
            np.nan, 1)
        word_position = {}
        for word in word_counter.index:
            word_position[word] = [
                frase.index(word)
                for frase in [sentence for sentence in sentences2]
                if word in frase
            ]
        word_position = Series(word_position)
        word_position = word_position.apply(lambda x: np.array(x)).apply(
            lambda x: np.mean(x))
        word_counter['position'] = word_position
        word_counter = word_counter.sort_values('position')

        word_counter = word_counter[word_counter['cumsum'] >=
                                    0.15]  #pra ignorar palavras inuteis.

        representacao_grupo = list(word_counter.index)

        # palavras_grupo = [word for word in word_counter.index]
        # primeiras_palavras = [word for word in palavras_grupo if ((word not in unidades) and (not word.isdigit()))]
        # if len(primeiras_palavras) > 1: #se tiver mais de uma palavra, inverte a ordem:
        # 	primeira = primeiras_palavras[0]
        # 	segunda = primeiras_palavras[1]
        # 	if len(primeiras_palavras) > 2:
        # 		primeiras_palavras = [segunda,primeira] + primeiras_palavras[2:]
        # 	else:
        # 		primeiras_palavras = [segunda,primeira]
        # meio_palavras = [word for word in palavras_grupo if word.isdigit()]
        # ultimas_palavras = [word for word in palavras_grupo if word in unidades]
        # representacao_grupo = primeiras_palavras + meio_palavras + ultimas_palavras

        # representacao_grupo = []
        # for palavra in palavras_grupo:
        #     representacao_grupo.append(palavra)
        #     if palavra in unidades:
        #         break
    return representacao_grupo
Beispiel #12
0
def minmax_rank_based(values, sensitivity):
    """ Give range to check outliers

    minmax_rank_based( [1, 2, 6, 4, ...], SENSITIVITY )
    (1, 17)
    """
    # Order by rank on number of instances
    value_counts = Series(delays).value_counts()
    # Cumulative sum
    cumsum = value_counts.cumsum()
    # Extract the subset of values that appears, individually, SENSITIVITY*100 % of time
    typicalValues = cumsum[cumsum <= sensitivity * value_counts.sum()]
    # Range obtained
    return min(typicalValues.index), max(typicalValues.index)
Beispiel #13
0
def lorenz_curve(data: pd.Series) -> pd.Series:
    """
    Calculates the values for the lorenz curve of the data.

    For more information see online `lorenz curve
    <https://en.wikipedia.org/wiki/Lorenz_curve>`_.

    Args:
        data: sorted series to calculate the lorenz curve for

    Returns:
        the values of the lorenz curve as a series
    """
    scaled_prefix_sum = data.cumsum() / data.sum()
    return tp.cast(pd.Series, scaled_prefix_sum)
Beispiel #14
0
def moving_avg_v3(values: pd.Series, window: int = 20) -> pd.Series:
    '''
    This is an O(n) time implementation of a simple moving average.
    It appears shift(window) starts at window + 0
      for example: with a window of 20 and zero based index
        was expecting shift to start at index 19.
        Shift started at index 20, the 21st position
    The workaround may be costly.  
    '''
    original_index = values.index.copy()
    cumsum = values.cumsum()
    cumsum = pd.concat([pd.Series(0, name=values.name), cumsum])
    mvg_avg = ((cumsum - cumsum.shift(window))/window)
    mvg_avg = mvg_avg.iloc[1:]
    mvg_avg.index = original_index
    return mvg_avg
Beispiel #15
0
def cumseries_continuous(returns: pd.Series) -> float:
    """Performs continuous compounding to create cumulative index series.

    e.g. if there are 3 returns r1, r2, r3,
    calculate
        exp(r1) - 1
        exp(r1 + r2) - 1
        exp(r1 + r2 + r3) - 1

    Args:
        returns: pandas series of returns, in decimals.
            i.e. 3% should be expressed as 0.03, not 3.

    Returns:
        returns: pandas series of cumulative index, in decimals.
    """

    return returns.cumsum().apply(lambda x: math.exp(x) - 1)
Beispiel #16
0
def cumseries_arithmetic(returns: pd.Series) -> pd.Series:
    """Performs arithmatic compounding to create cumulative index series.

    e.g. if there are 3 returns r1, r2, r3,
    calculate
        r1
        r1 + r2
        r1 + r2 + r3

    Args:
        returns: pandas series of returns, in decimals.
            i.e. 3% should be expressed as 0.03, not 3.

    Returns:
        returns: pandas series of cumulative index, in decimals.
    """

    return returns.cumsum()
Beispiel #17
0
def pandas_series_demo03():
    # operation on series
    s = Series(range(4), index=['a', 'b', 'c', 'd'])
    print('series, items > 1:\n', s[s > 1])
    print('series, items * 2:\n', s * 2)
    print('square series:\n', np.square(s))
    print('series, sum by rows:\n', s.cumsum())

    # 相同索引值的元素相加
    dict_data = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
    s1 = Series(dict_data)
    states = ['California', 'Ohio', 'Oregon', 'Texas']
    s2 = Series(dict_data, index=states)
    s3 = s1 + s2
    print('\nseries1 + series2:\n', s3)

    s3.index.name = 'state'
    s3.name = 'population'
    print('\nseries with name:\n', s3)
def predictions_better(ts_data, window_size, should_plot=True):
    results_ARIMA = model_combined_no_log(ts_data, window_size, True)

    # Make a series with cumulative fitted values
    predictions_ARIMA_diff = Series(results_ARIMA.fittedvalues, copy=True)
    predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()

    # Make a series with combined original and cumulative fitted values
    predictions_ARIMA = Series(ts_data.ix[0], index=ts_data.index)
    # predictions_ARIMA = predictions_ARIMA.add(predictions_ARIMA_diff, fill_value=0)
    predictions_ARIMA = predictions_ARIMA.add(predictions_ARIMA_diff_cumsum,
                                              fill_value=0)

    if should_plot:
        pyplot.figure(2)
        pyplot.plot(ts_data, color="blue", label="Original")
        pyplot.plot(predictions_ARIMA, color="green", label="Prediction")
        pyplot.legend(loc="best")
        pyplot.title("Predictions")
        pyplot.show(block=False)
Beispiel #19
0
def get_nome_representacao_do_grupo(df2):
    sentences = [sent for sent in df2['DS_ITEM_CLEAN']]
    sentences_set = set(sentences)
    if len(sentences_set) == 1:  #ou seja, todos os itens sao iguais.
        representacao_grupo = [word for word in sentences_set][0].split()
    else:
        sentences2 = [sent.split() for sent in df2['DS_ITEM_CLEAN']]
        flat_sentences2 = [item for sublist in sentences2 for item in sublist]
        word_counter = collections.Counter(flat_sentences2)
        word_counter = Series(word_counter).sort_values(ascending=False)
        word_counter = word_counter.cumsum().pct_change()
        word_counter = word_counter.replace(np.nan, 1)
        word_counter = word_counter[word_counter >=
                                    0.15]  #pra ignorar palavras inuteis.
        # word_counter = word_counter[word_counter >= 0.20] #pra ignorar palavras inuteis.
        # word_counter = word_counter[word_counter >= 0.05] #pra ignorar palavras inuteis.
        palavras_grupo = [word for word in word_counter.index]
        primeiras_palavras = [
            word for word in palavras_grupo
            if ((word not in unidades) and (not word.isdigit()))
        ]
        if len(primeiras_palavras
               ) > 1:  #se tiver mais de uma palavra, inverte a ordem:
            primeira = primeiras_palavras[0]
            segunda = primeiras_palavras[1]
            if len(primeiras_palavras) > 2:
                primeiras_palavras = [segunda, primeira
                                      ] + primeiras_palavras[2:]
            else:
                primeiras_palavras = [segunda, primeira]
        meio_palavras = [word for word in palavras_grupo if word.isdigit()]
        ultimas_palavras = [
            word for word in palavras_grupo if word in unidades
        ]
        representacao_grupo = primeiras_palavras + meio_palavras + ultimas_palavras
        # representacao_grupo = []
        # for palavra in palavras_grupo:
        #     representacao_grupo.append(palavra)
        #     if palavra in unidades:
        #         break
    return representacao_grupo
Beispiel #20
0
def residual_days(
    consumption_series: pd.Series,
    residual_weight: float,
    threshold: float = 0.0,
    forecast_size: int = None,
) -> int:
    """
    Calculate and return the remaining days of consumption until all residual product is consumed.
    Three possible return values:
        1. True value, if residual days is in range or consumption series
        2. forecast size + 1,
        if residual days is beyond range in consumption series and forecast_size is provided
        3. -1, if residual days is beyond range in consumption series and forecast_size is not provided
    Args:
        consumption_series: Consumption values as a pandas Series.
        residual_weight: Residual weight remaining.
        threshold: Threshold of 'empty' stock. Typically 0 or the average daily consumption (oz.).
        forecast_size: Size of forecast.

    Returns: Remaining days of consumption.

    """
    residual_weight = residual_weight - threshold
    start_timestamp = consumption_series.index[0]
    consumption_cumsum = (consumption_series.cumsum() -
                          consumption_series.values[0]
                          )  # remove consumption incurred before timestamp 0
    threshold_crossed = consumption_cumsum.ge(
        residual_weight)  # boolean array, True where cumsum >= residual weight
    if True in threshold_crossed.values:
        residual_end_timestamp = consumption_cumsum[threshold_crossed].index[
            0]  # first True case
        days_remaining = pd.Timedelta(residual_end_timestamp -
                                      start_timestamp) / pd.Timedelta("1D")
    elif forecast_size:
        days_remaining = forecast_size + 1
    else:
        days_remaining = -1
    return days_remaining
Beispiel #21
0
def get_cdf(raw_cnt: pd.Series) -> pd.DataFrame:
    """Compute CDF and related metrics from a Pandas Series.

    Typically used in conjuction with `stats_df`.

    >>> stats_df = stats_by(df, by='item', agg_funcs={'date': 'nunique'}, rename={'date': 'day_cnt'})
    >>> stats_df
          day_cnt
    item
    ab          1
    cd          1
    ef          2

    >>> raw_cnt_df = stats_df['day_cnt'].reset_index().groupby(by='day_cnt').count()
    >>> raw_cnt_df
             item
    day_cnt
    1           2
    2           1

    >>> cdf_df = get_cdf(raw_cnt_df.iloc[:, 0])
    >>> cdf_df
             count       cdf  cum_sum  rhs
    day_cnt
    1            2  0.666667        2    1
    2            1  1.000000        3    0
    """
    # CDF
    cum_sum = raw_cnt.cumsum()
    cdf = cum_sum / cum_sum.iloc[-1]

    # Right-hand side
    rhs = cum_sum.iloc[-1] - cum_sum
    return pd.DataFrame({
        "count": raw_cnt,
        "cdf": cdf,
        "cum_sum": cum_sum,
        "rhs": rhs
    })
class MySeries:
    def __init__(self, *args, **kwargs):
        self.x = Series(*args, **kwargs)
        self.values = self.x.values
        self.index = self.x.index
    
    def rolling_mean(self, *args, **kwargs):
        return MySeries(pd.rolling_mean(self.x, *args, **kwargs))

    def rolling_count(self, *args, **kwargs):
        return MySeries(pd.rolling_count(self.x, *args, **kwargs))

    def rolling_sum(self, *args, **kwargs):
        return MySeries(pd.rolling_sum(self.x, *args, **kwargs))

    def rolling_median(self, *args, **kwargs):
        return MySeries(pd.rolling_median(self.x, *args, **kwargs))
        
    def rolling_min(self, *args, **kwargs):
        return MySeries(pd.rolling_min(self.x, *args, **kwargs))

    def rolling_max(self, *args, **kwargs):
        return MySeries(pd.rolling_max(self.x, *args, **kwargs))

    def rolling_std(self, *args, **kwargs):
        return MySeries(pd.rolling_std(self.x, *args, **kwargs))

    def rolling_var(self, *args, **kwargs):
        return MySeries(pd.rolling_var(self.x, *args, **kwargs))

    def rolling_skew(self, *args, **kwargs):
        return MySeries(pd.rolling_skew(self.x, *args, **kwargs))

    def rolling_kurtosis(self, *args, **kwargs):
        return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs))

    def rolling_window(self, *args, **kwargs):
        return MySeries(pd.rolling_window(self.x, *args, **kwargs))

    def cumprod(self, *args, **kwargs):
        return MySeries(self.x.cumprod(*args, **kwargs))

    def cumsum(self, *args, **kwargs):
        return MySeries(self.x.cumsum(*args, **kwargs))

    def diff(self, *args, **kwargs):
        return MySeries(self.x.diff(*args, **kwargs))

    def div(self, *args, **kwargs):
        return MySeries(self.x.div(*args, **kwargs))

    def mul(self, *args, **kwargs):
        return MySeries(self.x.mul(*args, **kwargs))

    def add(self, *args, **kwargs):
        return MySeries(self.x.add(*args, **kwargs))

    def dropna(self, *args, **kwargs):
        return MySeries(self.x.dropna(*args, **kwargs))
    
    def fillna(self, *args, **kwargs):
        return MySeries(self.x.fillna(*args, **kwargs))

    def floordiv(self, *args, **kwargs):
        return MySeries(self.x.floordiv(*args, **kwargs))

    def mod(self, *args, **kwargs):
        return MySeries(self.x.mod(*args, **kwargs))

    def nlargest(self, *args, **kwargs):
        return MySeries(self.x.nlargest(*args, **kwargs))

    def nonzero(self, *args, **kwargs):
        return MySeries(self.x.nonzero(*args, **kwargs))

    def nsmallest(self, *args, **kwargs):
        return MySeries(self.x.nsmallest(*args, **kwargs))

    def pow(self, *args, **kwargs):
        return MySeries(self.x.pow(*args, **kwargs))

    def rank(self, *args, **kwargs):
        return MySeries(self.x.rank(*args, **kwargs))

    def round(self, *args, **kwargs):
        return MySeries(self.x.round(*args, **kwargs))

    def shift(self, *args, **kwargs):
        return MySeries(self.x.shift(*args, **kwargs))

    def sub(self, *args, **kwargs):
        return MySeries(self.x.sub(*args, **kwargs))

    def abs(self, *args, **kwargs):
        return MySeries(self.x.abs(*args, **kwargs))

    def clip(self, *args, **kwargs):
        return MySeries(self.x.clip(*args, **kwargs))

    def clip_lower(self, *args, **kwargs):
        return MySeries(self.x.clip_lower(*args, **kwargs))

    def clip_upper(self, *args, **kwargs):
        return MySeries(self.x.clip_upper(*args, **kwargs))
    
    def interpolate(self, *args, **kwargs):
        return MySeries(self.x.interpolate(*args, **kwargs))

    def resample(self, *args, **kwargs):
        return MySeries(self.x.resample(*args, **kwargs))
        
    def replace(self, *args, **kwargs):
        return MySeries(self.x.replace(*args, **kwargs))
Beispiel #23
0
def pandas_fast_cusum(values: pd.Series) -> pd.Series:
    """
    This is O(n) and optimized with C code
    """
    return values.cumsum()
def pick_import_e(x):
    return Series.cumsum(x)
Beispiel #25
0
#조인의 방법,데이터 엑세스 방법,데이터 분리이유?

from pandas import Series,DataFrame
import pandas as pd
import numpy as np

s = Series([2,4,8,np.nan,6])
s.sum()
s.sum(skipna = True)
s.sum(skipna = False)
s.mean()
s.var()
s.std()
s.max()
s.min()
s.cumsum()
s.idxmin()
s.idxmax()
s[s.idxmin()]
s[s.idxmax()]
s.describe()
s.count()
len(s)


df = DataFrame([[60,80,70],[50,75,83],[90,83,81]],index = ['홍길동','박찬호','손흥민'],columns = ['영어','수학','국어'])

df.sum()
df.sum(axis = 0)
df.sum(axis = 1)
df.mean()
Beispiel #26
0
s

s.sum()  # NaN 제외하고 더한다
s.sum(skipna = True)

s.sum(skipna = False)  # nan 있으면 계산불가

s.mean()  # NaN 제외하고 평균

s.var()  # NaN 제외하고 분산

s.std()  # NaN 제외하고 표준편차

s.max()
s.min()
s.cumsum()  # 누적합
s.idxmin()  # s.min() index
s.idxmax()  # s.max() index
s[s.idxmin()]  # s.min()
s[s.idxmax()]  # s.max()

s.describe()  # R : summary()
'''
count    4.000000
mean     5.000000
std      2.581989
min      2.000000
25%      3.500000
50%      5.000000
75%      6.500000
max      8.000000
Beispiel #27
0

# In[459]:

sorted_hist


# In[460]:

cleaned_hist = Series(sorted_hist, index=range(max(sorted_hist.index)+1))
cleaned_hist


# In[461]:

cleaned_hist.cumsum()[:-1].values


# In[462]:

summed_hist = Series(cleaned_hist.cumsum()[:-1].values, index=range(1, max(arr)+1))


# In[463]:

summed_hist_cleaned = Series(summed_hist, index=range(max(arr)+1)).fillna(0)
summed_hist_cleaned


# In[194]:
Beispiel #28
0
#import matplotlib.pyplot as plt
from pandas import Series
import pandas as pd
from numpy.random import randn
ts = Series(randn(1000), index=pd.date_range('1/1/2000', periods=1000))
ts = ts.cumsum()
ts.plot()
Beispiel #29
0
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

# In[2]:

nd = np.linspace(0, 100, num=50)
s = Series(nd)
s.plot()

# In[4]:

s.cumsum().plot()

# In[3]:

df = DataFrame(np.random.randint(0, 30, size=(10, 4)),
               index=list('abcdefghij'),
               columns=list('ABCD'))
df

# In[4]:

df.plot(title='DataFrame')

# In[5]:

df.plot(kind='bar')
Beispiel #30
0
print(index)

s1 = Series(np.linspace(1, 5, NUM_VALUES), index=index)
s2 = Series(np.linspace(1, 5, NUM_VALUES))

print(s1, "\n")
print(s2, "\n")

print(s1['f'])

print(s1[['h', 'b']], "\n")

print(s1[['a', 'b', 'c']], "\n")

print(s1.sum(), s1.mean(), s1.min(), s1.max(), "\n")
print(s1.cumsum(), s1.cumprod(), "\n")
print('a' in s1)
print('m' in s1)

s3 = s1 * 10
print(s3, "\n")

print(s3 > 25, "\n")
s3[s3 < 25] = -1
print(s3, "\n")

s4 = Series([-0.204708, 0.478943, -0.519439])
print(s4.max(), s4.min(), s4.max() - s4.min())

from pandas import Series
s = Series([5, 10, 15], ['a', 'b', 'c'])
Beispiel #31
0
def construct_charge_state_s(s_charge_rate: pd.Series,
                             time_unit: float = 0.5) -> pd.Series:
    s_charge_state = (s_charge_rate.cumsum().divide(1 / time_unit))

    return s_charge_state
Beispiel #32
0
def countingsort(arr):
    
    #Umwandlung in Pandas.Series für Histogrammbildung
    hist = Series(arr)
    sns.distplot(hist,kde=False,rug=True,color='royalblue', bins=max(hist.index)*3, label=r'Häufigkeit')
    plt.ylim(0, max(hist.index)+1)
    plt.xlabel('Element')
    plt.ylabel(r'Häufigkeit')
    plt.title(r'$\mathrm{Array\ Histogram}$')
    plt.show()
    
    
    
    #Sortiertes Histogramm, fehlende Werte (NaN) werden durch 0 ersetzt
    sorted_hist = hist.value_counts().sort_index()
    cleaned_hist = Series(sorted_hist, index=range(max(sorted_hist.index)+1)).fillna(0)

    
    #Aufsummierung der Werte im Histogramm
    summed_hist = Series(cleaned_hist.cumsum()[:-1].values, index=range(1, max(arr)+1))
    
    #Bereinigung des summierten Histogramms
    summed_hist_cleaned = Series(summed_hist, index=range(max(arr)+1)).fillna(0)
    
    
    
    
    #Kreiere DataFrames zu A, B und Hilfsarray C
    
    #DataFrame A
    rows = len(arr)            #Anzahl Reihen
    columns_A = []             #Anzahl Spalten
    
    #Benenne die Spalten für A
    for num in range(rows):
        columns_A.append('A[' + str(num) + ']')
        
    #Kreiere DataFrame
    dframe_A = DataFrame(np.array(list(arr)*rows).reshape(rows, rows), columns=columns_A, index=range(rows))
    
    
    #Das Gleiche nun für Hilfsarray C
    hilfs_array = np.array(summed_hist_cleaned.values)
    columns = len(hilfs_array)
    columns_C = []
    for num in range(columns):
        columns_C.append('C[' + str(num) + ']')
    dframe_C = DataFrame(np.array(list(hilfs_array)*rows).reshape(rows ,columns), index=range(rows), columns=columns_C)
    
    
    #Fertige zunächst LEERES DataFrame B an
    columns_B = []
    for num in range(rows):
        columns_B.append('B[' + str(num) + ']')    

    dframe_B = DataFrame(np.nan, index=range(rows),columns=columns_B).fillna(' ')
    
    
    #Kreiere Dict, in dem Keys und Values für das später fertig sortierte Array B angelegt werden 
    b = {}
    lookup_value = 0
    for i in range(rows):
        #Iteration der Werte in C sobald in A nachgeschlagen 
        if i > 0:
            dframe_C['C['+str(lookup_value)+']'][i:] += 1
        
        #Wert, der in C nachgeschlagen, und in B an Stelle C[A[i]] eingefügt werden soll
        lookup_value = dframe_A.values[i][i]
        key = 'B[' + str(int(dframe_C.values[i][lookup_value])) + ']'
        b[key] = [lookup_value,i]
    
    
    #Füge Werte schließlich sortiert in B ein
    for key, value in b.items():
        dframe_B[key][value[1]:] = value[0]    
    
    #Konkatenieren der 3 DataFrames
    final_dframe = pd.concat([dframe_A, dframe_C, dframe_B], axis=1)
    
    
    #Sortiertes Array B
    result = []
    for i in range(len(b)):
        result.append(b['B[' + str(i) + ']'][0])

    #print('\nDataFrame A\n')
    #display(dframe_A)
    #print('\nDataFrame C\n')
    #display(dframe_C)
    #print('\nDataFrame B\n')
    #display(dframe_B)
    
    #display(final_dframe)
    
    return result
# **Q10**: What is
# 
# ```Python
# s1.apply(lambda k: 2*k).sum()
# ```

# <markdowncell>

# **A10**:
# <pre>
# 10
# </pre>

# <codecell>

s1.cumsum()[3]

# <markdowncell>

# **Q11**: What is 
# 
# ```Python
#     s1.cumsum()[3]
# ```

# <markdowncell>

# **A11**:
# <pre>
# 2
# </pre>
Beispiel #34
0
#!/usr/bin/env python
# -*- coding: utf-8; -*-
# 01_matplotlib_basic_01.py - basic plot from pandas.pdf
# Copyright(C). masaru.charlie, 2015. All rights reserved.
#

from pandas import Series, DataFrame, date_range
from numpy.random import *
from datetime import datetime
import matplotlib.pyplot as plt

seed(123456)

## basic 1
ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
ts = ts.cumsum()
ts.plot()
plt.show()

## basic 2
df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD'))
df = df.cumsum()
df.plot(use_index=True)
plt.show()

## basic 3
df3 = DataFrame(randn(1000, 2), columns=['B', 'C']).cumsum()
df3['A'] = Series(list(range(len(df3))))
df3.plot(x='A', y='B')
plt.show()