def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) tm.assert_frame_equal(df, df.swapaxes(0, 0)) msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5)
def MLE_fast_func(self, p0t: list): """ Can only work if beta is not 0 substritutes p0 :param p0t: a dict= variables for substitute :return: list of the results after substituting of p0 """ if self.beta == 0: raise KeyError ##### # p0t = [min(i, 0.999) for i in p0t] ##### series_p0 = Series(p0t, self.N_time) M = self.Mi(series_p0) fixed_k = self.K.swaplevel(0, 1) # fixed_n = self.N.swaplevel(0, 1) part_a = DataFrame(index=self.N_time, columns=self.N_feature, dtype=float) part_c = DataFrame(index=self.N_time, columns=self.N_feature, dtype=float) upper_part_for_first_argument = DataFrame(index=self.N_time, columns=self.N_feature) for i in self.N_feature: upper_part_for_first_argument[i] = fixed_k[i].apply(lambda x: (x + self.alpha) * M[i]) - \ self.fixed_n_without_sigma[i] part_a[i] = fixed_k[i].apply(lambda x: self.sigma**2 * ( x + self.alpha) * M[i]) - self.fixed_n_with_sigma[i] part_c[i] = series_p0.apply(lambda x: M[i] - x * self.teta[i]) # part_a = part_a.swapaxes(0, 1) part_c = part_c.swapaxes(0, 1) upper_part_for_first_argument = upper_part_for_first_argument.swapaxes( 0, 1) return_list = np.array([ sum(upper_part_for_first_argument[self.N_time[0]] / (series_p0[self.N_time[0]] * part_c[self.N_time[0]])) ]) for t in self.N_time[1:]: part_b = series_p0[t] * ( series_p0[t] - series_p0[self.N_time[self.N_time.index(t) - 1]]) # Memory error with alot of days clean memory for avoiding memory error # gc.collect() return_list = np.append( return_list, sum((part_a.swapaxes(0, 1)[t] - (part_b * part_c[t])) / part_c[t])) return return_list
def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) tm.assert_frame_equal(df, df.swapaxes(0, 0)) msg = ("No axis named 2 for object type " r"<class 'pandas.core(.sparse)?.frame.(Sparse)?DataFrame'>") with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5)
def calculate_qi_based_on_MLE(self, p0t): """ :param p0t: :return: """ series_p0 = Series(p0t, self.N_time) fixed_k = self.K.swaplevel(0, 1) fixed_n = self.N.swaplevel(0, 1) nominator = Series(index=self.N_feature) denominator = Series(index=self.N_feature) pre_denominator = fixed_n.apply(lambda x: x + self.alpha + self.beta) pre_denominator = pre_denominator.swaplevel(0, 1) pre_denominator = pre_denominator.astype(float) almost_denominator = DataFrame(columns=self.N_time, index=self.N_feature, dtype=float) for t in self.N_time: almost_denominator[t] = pre_denominator[t].apply( lambda x: x * series_p0[t]) pre_denominator = almost_denominator.swapaxes(0, 1) for i in self.N_feature: nominator[i] = sum(fixed_k[i].apply(lambda x: x + self.alpha)) denominator[i] = sum(pre_denominator[i]) return nominator / denominator
def create_df(preprocessed_markov_chain): """ :param preprocessed_markov_chain: :type preprocessed_markov_chain: dict :return: :rtype: DataFrame """ df = DataFrame(preprocessed_markov_chain, index=preprocessed_markov_chain.keys()) df = df.swapaxes(1, 0, copy=False) return df
def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) assert_frame_equal(df.T, df.swapaxes(0, 1)) assert_frame_equal(df.T, df.swapaxes(1, 0)) assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5)
def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0))
def test_swapaxes_invalid_axis(self): df = DataFrame(np.random.randn(10, 5)) msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5)
def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True): self._var_name = var_name self._convert_dummies = convert_dummies self._drop_first = drop_first if isinstance(x, PanelData): x = x.dataframe self._original = x if isinstance(x, DataArray): if x.ndim not in (2, 3): raise ValueError('Only 2-d or 3-d DataArrays are supported') x = x.to_pandas() if isinstance(x, Series) and isinstance(x.index, pd.MultiIndex): x = DataFrame(x) elif isinstance(x, Series): raise ValueError( 'Series can only be used with a 2-level MultiIndex') if isinstance(x, (Panel, DataFrame)): if isinstance(x, DataFrame): if isinstance(x.index, pd.MultiIndex): if len(x.index.levels) != 2: raise ValueError('DataFrame input must have a ' 'MultiIndex with 2 levels') self._frame = x.copy() else: self._frame = DataFrame( {var_name: x.T.stack(dropna=False)}) else: self._frame = x.swapaxes(1, 2).to_frame(filter_observations=False) elif isinstance(x, ndarray): if not 2 <= x.ndim <= 3: raise ValueError('2 or 3-d array required for numpy input') if x.ndim == 2: x = x[None, :, :] k, t, n = x.shape variables = [var_name] if k == 1 else [ var_name + '.{0}'.format(i) for i in range(k) ] entities = ['entity.{0}'.format(i) for i in range(n)] time = list(range(t)) x = x.astype(np.float64) panel = Panel(x, items=variables, major_axis=time, minor_axis=entities) self._frame = panel.swapaxes(1, 2).to_frame(filter_observations=False) else: raise TypeError('Only ndarrays, DataFrames, Panels or DataArrays ' 'supported.') if convert_dummies: self._frame = expand_categoricals(self._frame, drop_first) self._frame = self._frame.astype(np.float64) time_index = Series(self._frame.index.levels[1]) if not (is_numeric_dtype(time_index.dtype) or is_datetime64_any_dtype(time_index.dtype)): raise ValueError('The index on the time dimension must be either ' 'numeric or date-like') self._k, self._t, self._n = self.panel.shape self._frame.index.levels[0].name = 'entity' self._frame.index.levels[1].name = 'time'
def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy=True): self._var_name = var_name self._convert_dummies = convert_dummies self._drop_first = drop_first self._panel = None self._shape = None index_names = ['entity', 'time'] if isinstance(x, PanelData): x = x.dataframe self._original = x if not isinstance(x, (Series, DataFrame, Panel, np.ndarray)): try: from xarray import DataArray if isinstance(x, DataArray): if x.ndim not in (2, 3): raise ValueError( 'Only 2-d or 3-d DataArrays are supported') if x.ndim == 2: x = x.to_pandas() else: items = x.coords[x.dims[0]].values.tolist() major = x.coords[x.dims[1]].values.tolist() minor = x.coords[x.dims[2]].values.tolist() values = x.values x = panel_to_frame(values, items, major, minor, True) except ImportError: pass if isinstance(x, Series) and isinstance(x.index, MultiIndex): x = DataFrame(x) elif isinstance(x, Series): raise ValueError( 'Series can only be used with a 2-level MultiIndex') if isinstance(x, (Panel, DataFrame)): if isinstance(x, DataFrame): if isinstance(x.index, MultiIndex): if len(x.index.levels) != 2: raise ValueError('DataFrame input must have a ' 'MultiIndex with 2 levels') if isinstance(self._original, (DataFrame, PanelData, Series)): for i in range(2): index_names[ i] = x.index.levels[i].name or index_names[i] self._frame = x if copy: self._frame = self._frame.copy() else: self._frame = DataFrame( {var_name: x.T.stack(dropna=False)}) else: self._frame = x.swapaxes(1, 2).to_frame(filter_observations=False) elif isinstance(x, np.ndarray): if x.ndim not in (2, 3): raise ValueError('2 or 3-d array required for numpy input') if x.ndim == 2: x = x[None, :, :] k, t, n = x.shape var_str = var_name + '.{0:0>' + str(int(np.log10(k) + .01)) + '}' variables = [var_name] if k == 1 else [ var_str.format(i) for i in range(k) ] entity_str = 'entity.{0:0>' + str(int(np.log10(n) + .01)) + '}' entities = [entity_str.format(i) for i in range(n)] time = list(range(t)) x = x.astype(np.float64, copy=False) panel = _Panel.from_array(x, items=variables, major_axis=time, minor_axis=entities) self._fake_panel = panel self._frame = panel.to_frame() else: raise TypeError('Only ndarrays, DataFrames, Panels or DataArrays ' 'are supported') if convert_dummies: self._frame = expand_categoricals(self._frame, drop_first) self._frame = self._frame.astype(np.float64, copy=False) time_index = Series(self._frame.index.levels[1]) if not (is_numeric_dtype(time_index.dtype) or is_datetime64_any_dtype(time_index.dtype)): raise ValueError('The index on the time dimension must be either ' 'numeric or date-like') # self._k, self._t, self._n = self.panel.shape self._k, self._t, self._n = self.shape levels = self._frame.index.levels for i in range(2): levels[i].name = index_names[i]
d.swaplevel(0,1) #%% d.swaplevel('row1','row2') #%% 交换列索引顺序 d.swaplevel(0,1,axis=1) #%% 将行索引的最低一级转化为列索引 d.unstack() #%% 将列索引的最低一级转化为行索引 d.stack() # 疑问: # d.stack().stack() 变成一个系列是可以理解的,但为何d.unstack().unstack() # 也变成一个系列了 #%% 互换行和列 d.swapaxes(0,1) # 将索引转化为列 d1=d.reset_index() d1 #%% 将列转化为索引 d1.set_index('row1') #%% 转化后保留列 d1.set_index('row1',drop=False) #%% 将两个列转化为索引 d1.set_index(['row1','row2']) #%% 如果列名和索引名称重复,会失败 d1.set_index('row1',drop=False).reset_index() #%% 重命名即可解决 d2=d1.set_index('row1',drop=False)
d3.pivot_table('value1',rows='bar',cols='foo',aggfunc=sum) #%% # 由于有重复不能做pivot操作 d3.pivot('foo','bar','value1') # error #%% 将行数据转化为列名 d4=d1.set_index([d1.index,'foo']).unstack() d4 #%% 将列名转化为行数据 d4=d4.swaplevel(0,1,axis=1).stack() d4.set_index(d4.index.droplevel(0)).reset_index() #%% 交换数据框的横 # DataFrame.swapaxes(axis1, axis2, copy=True) # axis = 0 表示行,axis = 0 表示列,真不明白这个函数为啥需要这两个参数 # d1.swapaxes(0,1) 和 d1.swapaxes(1,0) 都管用 d1.swapaxes(0,1) #%% ''' 我们通常指的行转列是把一整列的数据(也就是一整列中的所有行)转化到一级列名上,也可以把 一级列名转化到一个数据列上去(列转行),但似乎却无法直接将一级行索引转化成一行数据,这 说明pandas的行列操作也不是完全平衡的。当然在使用swapaxes后就可以了,当然这种操作奇怪得 很,估计是很难用到的。以下演示了这样的转化 ''' #%% 将一整列的数据转化为一整行 d6=d1.set_index([d1.index,'foo']).unstack() d6.swapaxes(0,1).reset_index().set_index('level_0').swapaxes(0,1) #%% ''' 这里涉及到一个数据分析处理的惯性思维的问题:我们通常要求一列的数据是同一种类型,而很少 要求一行的数据是同一种类型。所以在看到这种奇怪的数据转换,比较难以理解且说明白其代表的