def generalized_kolmogorov_smirnov(instance: Series, tag_list: Series, p: float = 1): """Random walk / running sums statistic of GSEA, generalised KS, with p=0 kind-of equivalent to normal KS (see proof in GSEA 2005 paper supplement) """ ranked_list = instance.rank(ascending=False) + 1 n = len(ranked_list) t = len(tag_list) decrement = 1 / (n - t) hit_denominator = ranked_list[tag_list.index].pow(p).sum() running_sum_statistic_hits = Series(data=0, index=instance.index) running_sum_statistic_hits[ tag_list.index] = ranked_list[tag_list.index].pow(p) / hit_denominator running_sum_statistic_hits = running_sum_statistic_hits.cumsum() running_sum_statistic_misses = Series(data=decrement, index=instance.index) running_sum_statistic_misses[tag_list.index] = 0 running_sum_statistic_misses = running_sum_statistic_misses.cumsum() return -max(running_sum_statistic_hits - running_sum_statistic_misses, key=abs)
def get_abs_max_drawdown(profit_results: Series) -> float: max_drawdown_df = DataFrame() max_drawdown_df['cumulative'] = profit_results.cumsum() max_drawdown_df['high_value'] = max_drawdown_df['cumulative'].cummax() max_drawdown_df['drawdown'] = max_drawdown_df['cumulative'] - max_drawdown_df['high_value'] return abs(max_drawdown_df['drawdown'].min())
def location(self): i = 0 ser = Series([]) for x in val_segment_dict.values()[self.start: self.end]: #for x in val_segment_dict.get(key, []): ser = ser.append(Series(len(x))) return ser.values, ser.cumsum().values
def start_date( confirmed_cases: pd.Series, gt_distribution: np.ndarray, r_window_size: Optional[int] = None, ) -> date: """ determine start date of reliable R-estimation: 3 conditions to be fulfilled according to [Cori et al., 2013] condition 1: cumulative cases have reached at least 12 condition 2: at least one mean generation time after index case condition 3: if applicable: at least one r_window_size after index case """ cumulative_cases = confirmed_cases.cumsum() over_dozen_cases_date = cumulative_cases.ge(12).idxmax() index_case_date = confirmed_cases.ne(0).idxmax() mean_gt = gt_distribution @ np.arange(len(gt_distribution)) / gt_distribution.sum() r_window_size = r_window_size if isinstance(r_window_size, int) else 0 condition1_date = over_dozen_cases_date condition2_date = index_case_date + timedelta(days=np.ceil(mean_gt)) condition3_date = index_case_date + timedelta(days=r_window_size) first_true_date = max(condition1_date, condition2_date, condition3_date) return first_true_date
def get_exp_ratio_idx(data: pd.Series, exp_ratio): data = data.cumsum() / data.sum() exp_ident = [] for ratio in exp_ratio: ident = data[data > ratio].index[0] exp_ident.append(ident) return data, exp_ident
def _add_gross_and_costs(gross: pd.Series, costs: pd.Series): cumsum_costs = costs.cumsum() cumsum_costs_aligned = cumsum_costs.reindex(gross.index, method="ffill") costs_aligned = cumsum_costs_aligned.diff() net = gross + costs_aligned return net
def get_precision_recall( data: pd.Series, ties: Optional[np.ndarray] = None) -> Tuple[pd.Series, pd.Series]: r = np.arange(1, data.shape[0] + 1) c = data.cumsum() if ties is not None: return fix_tied(ties, c / r), fix_tied(ties, c / data.sum()) return (c / r), (c / data.sum())
def cumulative_moving_avg_v3(values: pd.Series, window: int = 5) -> pd.Series: ''' Cumulative Moving Average ''' # denominator is one-indexed location of the element in the cumsum denominator = pd.Series(np.arange(1, values.shape[0]+1), index=values.index) result = values.cumsum() / denominator # Set the first window elements to nan result.iloc[:(window-1)] = np.nan return result
def get_ica_components(X, contribution=0.85): X_ica = FastICA(n_components=len(X.columns)).fit(X) L2 = Series(np.sum(X_ica.mixing_**2, axis=0)) L2.sort_values(ascending=False, inplace=True) X_S = DataFrame(X_ica.transform(X)) X_ica_mixing_ = DataFrame(X_ica.mixing_) L2.drop(L2.index[L2.cumsum() / L2.sum() >= contribution][1:], inplace=True) return X_S.reindex(columns=L2.index).values, X_ica_mixing_.reindex( columns=L2.index).values, X_ica.mean_, len(L2)
def vwap(prices: pd.Series, volume: pd.Series) -> pd.Series: """ df['vwap'] = (np.cumsum(df.quantity * df.price) / np.cumsum(df.quantity)) vwap = daily volume-weighted average price :param prices: :param volume: :return: """ if isinstance(prices.index, pd.MultiIndex): return (volume * prices).groupby(level=1).cumsum() / volume.groupby( level=1).cumsum() else: return (volume * prices).cumsum() / volume.cumsum()
def get_nome_representacao_do_grupo_v2(df2): sentences = [sent for sent in df2['DS_ITEM_CLEAN']] sentences_set = set(sentences) if len(sentences_set) == 1: #ou seja, todos os itens sao iguais. representacao_grupo = [word for word in sentences_set][0].split() else: sentences2 = [sent.split() for sent in df2['DS_ITEM_CLEAN']] flat_sentences2 = [item for sublist in sentences2 for item in sublist] word_counter = collections.Counter(flat_sentences2) word_counter = Series(word_counter).sort_values(ascending=False) word_counter = DataFrame(word_counter) word_counter.columns = ['word'] word_counter['cumsum'] = word_counter.cumsum().pct_change().replace( np.nan, 1) word_position = {} for word in word_counter.index: word_position[word] = [ frase.index(word) for frase in [sentence for sentence in sentences2] if word in frase ] word_position = Series(word_position) word_position = word_position.apply(lambda x: np.array(x)).apply( lambda x: np.mean(x)) word_counter['position'] = word_position word_counter = word_counter.sort_values('position') word_counter = word_counter[word_counter['cumsum'] >= 0.15] #pra ignorar palavras inuteis. representacao_grupo = list(word_counter.index) # palavras_grupo = [word for word in word_counter.index] # primeiras_palavras = [word for word in palavras_grupo if ((word not in unidades) and (not word.isdigit()))] # if len(primeiras_palavras) > 1: #se tiver mais de uma palavra, inverte a ordem: # primeira = primeiras_palavras[0] # segunda = primeiras_palavras[1] # if len(primeiras_palavras) > 2: # primeiras_palavras = [segunda,primeira] + primeiras_palavras[2:] # else: # primeiras_palavras = [segunda,primeira] # meio_palavras = [word for word in palavras_grupo if word.isdigit()] # ultimas_palavras = [word for word in palavras_grupo if word in unidades] # representacao_grupo = primeiras_palavras + meio_palavras + ultimas_palavras # representacao_grupo = [] # for palavra in palavras_grupo: # representacao_grupo.append(palavra) # if palavra in unidades: # break return representacao_grupo
def minmax_rank_based(values, sensitivity): """ Give range to check outliers minmax_rank_based( [1, 2, 6, 4, ...], SENSITIVITY ) (1, 17) """ # Order by rank on number of instances value_counts = Series(delays).value_counts() # Cumulative sum cumsum = value_counts.cumsum() # Extract the subset of values that appears, individually, SENSITIVITY*100 % of time typicalValues = cumsum[cumsum <= sensitivity * value_counts.sum()] # Range obtained return min(typicalValues.index), max(typicalValues.index)
def lorenz_curve(data: pd.Series) -> pd.Series: """ Calculates the values for the lorenz curve of the data. For more information see online `lorenz curve <https://en.wikipedia.org/wiki/Lorenz_curve>`_. Args: data: sorted series to calculate the lorenz curve for Returns: the values of the lorenz curve as a series """ scaled_prefix_sum = data.cumsum() / data.sum() return tp.cast(pd.Series, scaled_prefix_sum)
def moving_avg_v3(values: pd.Series, window: int = 20) -> pd.Series: ''' This is an O(n) time implementation of a simple moving average. It appears shift(window) starts at window + 0 for example: with a window of 20 and zero based index was expecting shift to start at index 19. Shift started at index 20, the 21st position The workaround may be costly. ''' original_index = values.index.copy() cumsum = values.cumsum() cumsum = pd.concat([pd.Series(0, name=values.name), cumsum]) mvg_avg = ((cumsum - cumsum.shift(window))/window) mvg_avg = mvg_avg.iloc[1:] mvg_avg.index = original_index return mvg_avg
def cumseries_continuous(returns: pd.Series) -> float: """Performs continuous compounding to create cumulative index series. e.g. if there are 3 returns r1, r2, r3, calculate exp(r1) - 1 exp(r1 + r2) - 1 exp(r1 + r2 + r3) - 1 Args: returns: pandas series of returns, in decimals. i.e. 3% should be expressed as 0.03, not 3. Returns: returns: pandas series of cumulative index, in decimals. """ return returns.cumsum().apply(lambda x: math.exp(x) - 1)
def cumseries_arithmetic(returns: pd.Series) -> pd.Series: """Performs arithmatic compounding to create cumulative index series. e.g. if there are 3 returns r1, r2, r3, calculate r1 r1 + r2 r1 + r2 + r3 Args: returns: pandas series of returns, in decimals. i.e. 3% should be expressed as 0.03, not 3. Returns: returns: pandas series of cumulative index, in decimals. """ return returns.cumsum()
def pandas_series_demo03(): # operation on series s = Series(range(4), index=['a', 'b', 'c', 'd']) print('series, items > 1:\n', s[s > 1]) print('series, items * 2:\n', s * 2) print('square series:\n', np.square(s)) print('series, sum by rows:\n', s.cumsum()) # 相同索引值的元素相加 dict_data = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} s1 = Series(dict_data) states = ['California', 'Ohio', 'Oregon', 'Texas'] s2 = Series(dict_data, index=states) s3 = s1 + s2 print('\nseries1 + series2:\n', s3) s3.index.name = 'state' s3.name = 'population' print('\nseries with name:\n', s3)
def predictions_better(ts_data, window_size, should_plot=True): results_ARIMA = model_combined_no_log(ts_data, window_size, True) # Make a series with cumulative fitted values predictions_ARIMA_diff = Series(results_ARIMA.fittedvalues, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() # Make a series with combined original and cumulative fitted values predictions_ARIMA = Series(ts_data.ix[0], index=ts_data.index) # predictions_ARIMA = predictions_ARIMA.add(predictions_ARIMA_diff, fill_value=0) predictions_ARIMA = predictions_ARIMA.add(predictions_ARIMA_diff_cumsum, fill_value=0) if should_plot: pyplot.figure(2) pyplot.plot(ts_data, color="blue", label="Original") pyplot.plot(predictions_ARIMA, color="green", label="Prediction") pyplot.legend(loc="best") pyplot.title("Predictions") pyplot.show(block=False)
def get_nome_representacao_do_grupo(df2): sentences = [sent for sent in df2['DS_ITEM_CLEAN']] sentences_set = set(sentences) if len(sentences_set) == 1: #ou seja, todos os itens sao iguais. representacao_grupo = [word for word in sentences_set][0].split() else: sentences2 = [sent.split() for sent in df2['DS_ITEM_CLEAN']] flat_sentences2 = [item for sublist in sentences2 for item in sublist] word_counter = collections.Counter(flat_sentences2) word_counter = Series(word_counter).sort_values(ascending=False) word_counter = word_counter.cumsum().pct_change() word_counter = word_counter.replace(np.nan, 1) word_counter = word_counter[word_counter >= 0.15] #pra ignorar palavras inuteis. # word_counter = word_counter[word_counter >= 0.20] #pra ignorar palavras inuteis. # word_counter = word_counter[word_counter >= 0.05] #pra ignorar palavras inuteis. palavras_grupo = [word for word in word_counter.index] primeiras_palavras = [ word for word in palavras_grupo if ((word not in unidades) and (not word.isdigit())) ] if len(primeiras_palavras ) > 1: #se tiver mais de uma palavra, inverte a ordem: primeira = primeiras_palavras[0] segunda = primeiras_palavras[1] if len(primeiras_palavras) > 2: primeiras_palavras = [segunda, primeira ] + primeiras_palavras[2:] else: primeiras_palavras = [segunda, primeira] meio_palavras = [word for word in palavras_grupo if word.isdigit()] ultimas_palavras = [ word for word in palavras_grupo if word in unidades ] representacao_grupo = primeiras_palavras + meio_palavras + ultimas_palavras # representacao_grupo = [] # for palavra in palavras_grupo: # representacao_grupo.append(palavra) # if palavra in unidades: # break return representacao_grupo
def residual_days( consumption_series: pd.Series, residual_weight: float, threshold: float = 0.0, forecast_size: int = None, ) -> int: """ Calculate and return the remaining days of consumption until all residual product is consumed. Three possible return values: 1. True value, if residual days is in range or consumption series 2. forecast size + 1, if residual days is beyond range in consumption series and forecast_size is provided 3. -1, if residual days is beyond range in consumption series and forecast_size is not provided Args: consumption_series: Consumption values as a pandas Series. residual_weight: Residual weight remaining. threshold: Threshold of 'empty' stock. Typically 0 or the average daily consumption (oz.). forecast_size: Size of forecast. Returns: Remaining days of consumption. """ residual_weight = residual_weight - threshold start_timestamp = consumption_series.index[0] consumption_cumsum = (consumption_series.cumsum() - consumption_series.values[0] ) # remove consumption incurred before timestamp 0 threshold_crossed = consumption_cumsum.ge( residual_weight) # boolean array, True where cumsum >= residual weight if True in threshold_crossed.values: residual_end_timestamp = consumption_cumsum[threshold_crossed].index[ 0] # first True case days_remaining = pd.Timedelta(residual_end_timestamp - start_timestamp) / pd.Timedelta("1D") elif forecast_size: days_remaining = forecast_size + 1 else: days_remaining = -1 return days_remaining
def get_cdf(raw_cnt: pd.Series) -> pd.DataFrame: """Compute CDF and related metrics from a Pandas Series. Typically used in conjuction with `stats_df`. >>> stats_df = stats_by(df, by='item', agg_funcs={'date': 'nunique'}, rename={'date': 'day_cnt'}) >>> stats_df day_cnt item ab 1 cd 1 ef 2 >>> raw_cnt_df = stats_df['day_cnt'].reset_index().groupby(by='day_cnt').count() >>> raw_cnt_df item day_cnt 1 2 2 1 >>> cdf_df = get_cdf(raw_cnt_df.iloc[:, 0]) >>> cdf_df count cdf cum_sum rhs day_cnt 1 2 0.666667 2 1 2 1 1.000000 3 0 """ # CDF cum_sum = raw_cnt.cumsum() cdf = cum_sum / cum_sum.iloc[-1] # Right-hand side rhs = cum_sum.iloc[-1] - cum_sum return pd.DataFrame({ "count": raw_cnt, "cdf": cdf, "cum_sum": cum_sum, "rhs": rhs })
class MySeries: def __init__(self, *args, **kwargs): self.x = Series(*args, **kwargs) self.values = self.x.values self.index = self.x.index def rolling_mean(self, *args, **kwargs): return MySeries(pd.rolling_mean(self.x, *args, **kwargs)) def rolling_count(self, *args, **kwargs): return MySeries(pd.rolling_count(self.x, *args, **kwargs)) def rolling_sum(self, *args, **kwargs): return MySeries(pd.rolling_sum(self.x, *args, **kwargs)) def rolling_median(self, *args, **kwargs): return MySeries(pd.rolling_median(self.x, *args, **kwargs)) def rolling_min(self, *args, **kwargs): return MySeries(pd.rolling_min(self.x, *args, **kwargs)) def rolling_max(self, *args, **kwargs): return MySeries(pd.rolling_max(self.x, *args, **kwargs)) def rolling_std(self, *args, **kwargs): return MySeries(pd.rolling_std(self.x, *args, **kwargs)) def rolling_var(self, *args, **kwargs): return MySeries(pd.rolling_var(self.x, *args, **kwargs)) def rolling_skew(self, *args, **kwargs): return MySeries(pd.rolling_skew(self.x, *args, **kwargs)) def rolling_kurtosis(self, *args, **kwargs): return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs)) def rolling_window(self, *args, **kwargs): return MySeries(pd.rolling_window(self.x, *args, **kwargs)) def cumprod(self, *args, **kwargs): return MySeries(self.x.cumprod(*args, **kwargs)) def cumsum(self, *args, **kwargs): return MySeries(self.x.cumsum(*args, **kwargs)) def diff(self, *args, **kwargs): return MySeries(self.x.diff(*args, **kwargs)) def div(self, *args, **kwargs): return MySeries(self.x.div(*args, **kwargs)) def mul(self, *args, **kwargs): return MySeries(self.x.mul(*args, **kwargs)) def add(self, *args, **kwargs): return MySeries(self.x.add(*args, **kwargs)) def dropna(self, *args, **kwargs): return MySeries(self.x.dropna(*args, **kwargs)) def fillna(self, *args, **kwargs): return MySeries(self.x.fillna(*args, **kwargs)) def floordiv(self, *args, **kwargs): return MySeries(self.x.floordiv(*args, **kwargs)) def mod(self, *args, **kwargs): return MySeries(self.x.mod(*args, **kwargs)) def nlargest(self, *args, **kwargs): return MySeries(self.x.nlargest(*args, **kwargs)) def nonzero(self, *args, **kwargs): return MySeries(self.x.nonzero(*args, **kwargs)) def nsmallest(self, *args, **kwargs): return MySeries(self.x.nsmallest(*args, **kwargs)) def pow(self, *args, **kwargs): return MySeries(self.x.pow(*args, **kwargs)) def rank(self, *args, **kwargs): return MySeries(self.x.rank(*args, **kwargs)) def round(self, *args, **kwargs): return MySeries(self.x.round(*args, **kwargs)) def shift(self, *args, **kwargs): return MySeries(self.x.shift(*args, **kwargs)) def sub(self, *args, **kwargs): return MySeries(self.x.sub(*args, **kwargs)) def abs(self, *args, **kwargs): return MySeries(self.x.abs(*args, **kwargs)) def clip(self, *args, **kwargs): return MySeries(self.x.clip(*args, **kwargs)) def clip_lower(self, *args, **kwargs): return MySeries(self.x.clip_lower(*args, **kwargs)) def clip_upper(self, *args, **kwargs): return MySeries(self.x.clip_upper(*args, **kwargs)) def interpolate(self, *args, **kwargs): return MySeries(self.x.interpolate(*args, **kwargs)) def resample(self, *args, **kwargs): return MySeries(self.x.resample(*args, **kwargs)) def replace(self, *args, **kwargs): return MySeries(self.x.replace(*args, **kwargs))
def pandas_fast_cusum(values: pd.Series) -> pd.Series: """ This is O(n) and optimized with C code """ return values.cumsum()
def pick_import_e(x): return Series.cumsum(x)
#조인의 방법,데이터 엑세스 방법,데이터 분리이유? from pandas import Series,DataFrame import pandas as pd import numpy as np s = Series([2,4,8,np.nan,6]) s.sum() s.sum(skipna = True) s.sum(skipna = False) s.mean() s.var() s.std() s.max() s.min() s.cumsum() s.idxmin() s.idxmax() s[s.idxmin()] s[s.idxmax()] s.describe() s.count() len(s) df = DataFrame([[60,80,70],[50,75,83],[90,83,81]],index = ['홍길동','박찬호','손흥민'],columns = ['영어','수학','국어']) df.sum() df.sum(axis = 0) df.sum(axis = 1) df.mean()
s s.sum() # NaN 제외하고 더한다 s.sum(skipna = True) s.sum(skipna = False) # nan 있으면 계산불가 s.mean() # NaN 제외하고 평균 s.var() # NaN 제외하고 분산 s.std() # NaN 제외하고 표준편차 s.max() s.min() s.cumsum() # 누적합 s.idxmin() # s.min() index s.idxmax() # s.max() index s[s.idxmin()] # s.min() s[s.idxmax()] # s.max() s.describe() # R : summary() ''' count 4.000000 mean 5.000000 std 2.581989 min 2.000000 25% 3.500000 50% 5.000000 75% 6.500000 max 8.000000
# In[459]: sorted_hist # In[460]: cleaned_hist = Series(sorted_hist, index=range(max(sorted_hist.index)+1)) cleaned_hist # In[461]: cleaned_hist.cumsum()[:-1].values # In[462]: summed_hist = Series(cleaned_hist.cumsum()[:-1].values, index=range(1, max(arr)+1)) # In[463]: summed_hist_cleaned = Series(summed_hist, index=range(max(arr)+1)).fillna(0) summed_hist_cleaned # In[194]:
#import matplotlib.pyplot as plt from pandas import Series import pandas as pd from numpy.random import randn ts = Series(randn(1000), index=pd.date_range('1/1/2000', periods=1000)) ts = ts.cumsum() ts.plot()
import pandas as pd import numpy as np from pandas import Series, DataFrame import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: nd = np.linspace(0, 100, num=50) s = Series(nd) s.plot() # In[4]: s.cumsum().plot() # In[3]: df = DataFrame(np.random.randint(0, 30, size=(10, 4)), index=list('abcdefghij'), columns=list('ABCD')) df # In[4]: df.plot(title='DataFrame') # In[5]: df.plot(kind='bar')
print(index) s1 = Series(np.linspace(1, 5, NUM_VALUES), index=index) s2 = Series(np.linspace(1, 5, NUM_VALUES)) print(s1, "\n") print(s2, "\n") print(s1['f']) print(s1[['h', 'b']], "\n") print(s1[['a', 'b', 'c']], "\n") print(s1.sum(), s1.mean(), s1.min(), s1.max(), "\n") print(s1.cumsum(), s1.cumprod(), "\n") print('a' in s1) print('m' in s1) s3 = s1 * 10 print(s3, "\n") print(s3 > 25, "\n") s3[s3 < 25] = -1 print(s3, "\n") s4 = Series([-0.204708, 0.478943, -0.519439]) print(s4.max(), s4.min(), s4.max() - s4.min()) from pandas import Series s = Series([5, 10, 15], ['a', 'b', 'c'])
def construct_charge_state_s(s_charge_rate: pd.Series, time_unit: float = 0.5) -> pd.Series: s_charge_state = (s_charge_rate.cumsum().divide(1 / time_unit)) return s_charge_state
def countingsort(arr): #Umwandlung in Pandas.Series für Histogrammbildung hist = Series(arr) sns.distplot(hist,kde=False,rug=True,color='royalblue', bins=max(hist.index)*3, label=r'Häufigkeit') plt.ylim(0, max(hist.index)+1) plt.xlabel('Element') plt.ylabel(r'Häufigkeit') plt.title(r'$\mathrm{Array\ Histogram}$') plt.show() #Sortiertes Histogramm, fehlende Werte (NaN) werden durch 0 ersetzt sorted_hist = hist.value_counts().sort_index() cleaned_hist = Series(sorted_hist, index=range(max(sorted_hist.index)+1)).fillna(0) #Aufsummierung der Werte im Histogramm summed_hist = Series(cleaned_hist.cumsum()[:-1].values, index=range(1, max(arr)+1)) #Bereinigung des summierten Histogramms summed_hist_cleaned = Series(summed_hist, index=range(max(arr)+1)).fillna(0) #Kreiere DataFrames zu A, B und Hilfsarray C #DataFrame A rows = len(arr) #Anzahl Reihen columns_A = [] #Anzahl Spalten #Benenne die Spalten für A for num in range(rows): columns_A.append('A[' + str(num) + ']') #Kreiere DataFrame dframe_A = DataFrame(np.array(list(arr)*rows).reshape(rows, rows), columns=columns_A, index=range(rows)) #Das Gleiche nun für Hilfsarray C hilfs_array = np.array(summed_hist_cleaned.values) columns = len(hilfs_array) columns_C = [] for num in range(columns): columns_C.append('C[' + str(num) + ']') dframe_C = DataFrame(np.array(list(hilfs_array)*rows).reshape(rows ,columns), index=range(rows), columns=columns_C) #Fertige zunächst LEERES DataFrame B an columns_B = [] for num in range(rows): columns_B.append('B[' + str(num) + ']') dframe_B = DataFrame(np.nan, index=range(rows),columns=columns_B).fillna(' ') #Kreiere Dict, in dem Keys und Values für das später fertig sortierte Array B angelegt werden b = {} lookup_value = 0 for i in range(rows): #Iteration der Werte in C sobald in A nachgeschlagen if i > 0: dframe_C['C['+str(lookup_value)+']'][i:] += 1 #Wert, der in C nachgeschlagen, und in B an Stelle C[A[i]] eingefügt werden soll lookup_value = dframe_A.values[i][i] key = 'B[' + str(int(dframe_C.values[i][lookup_value])) + ']' b[key] = [lookup_value,i] #Füge Werte schließlich sortiert in B ein for key, value in b.items(): dframe_B[key][value[1]:] = value[0] #Konkatenieren der 3 DataFrames final_dframe = pd.concat([dframe_A, dframe_C, dframe_B], axis=1) #Sortiertes Array B result = [] for i in range(len(b)): result.append(b['B[' + str(i) + ']'][0]) #print('\nDataFrame A\n') #display(dframe_A) #print('\nDataFrame C\n') #display(dframe_C) #print('\nDataFrame B\n') #display(dframe_B) #display(final_dframe) return result
# **Q10**: What is # # ```Python # s1.apply(lambda k: 2*k).sum() # ``` # <markdowncell> # **A10**: # <pre> # 10 # </pre> # <codecell> s1.cumsum()[3] # <markdowncell> # **Q11**: What is # # ```Python # s1.cumsum()[3] # ``` # <markdowncell> # **A11**: # <pre> # 2 # </pre>
#!/usr/bin/env python # -*- coding: utf-8; -*- # 01_matplotlib_basic_01.py - basic plot from pandas.pdf # Copyright(C). masaru.charlie, 2015. All rights reserved. # from pandas import Series, DataFrame, date_range from numpy.random import * from datetime import datetime import matplotlib.pyplot as plt seed(123456) ## basic 1 ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000)) ts = ts.cumsum() ts.plot() plt.show() ## basic 2 df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD')) df = df.cumsum() df.plot(use_index=True) plt.show() ## basic 3 df3 = DataFrame(randn(1000, 2), columns=['B', 'C']).cumsum() df3['A'] = Series(list(range(len(df3)))) df3.plot(x='A', y='B') plt.show()