def process_pandas(data, endog_idx=0, exog_idx=None, index_idx=None): names = data.columns if isinstance(endog_idx, int): endog_name = names[endog_idx] endog = data[endog_name].copy() if exog_idx is None: exog = data.drop([endog_name], axis=1) else: exog = data[names[exog_idx]].copy() else: endog = data.loc[:, endog_idx].copy() endog_name = list(endog.columns) if exog_idx is None: exog = data.drop(endog_name, axis=1) elif isinstance(exog_idx, int): exog = data[names[exog_idx]].copy() else: exog = data[names[exog_idx]].copy() if index_idx is not None: # NOTE: will have to be improved for dates index = Index(data.iloc[:, index_idx]) endog.index = index exog.index = index.copy() data = data.set_index(names[index_idx]) exog_name = list(exog.columns) dataset = Dataset(data=data, names=list(names), endog=endog, exog=exog, endog_name=endog_name, exog_name=exog_name) return dataset
def process_pandas(data, endog_idx=0, exog_idx=None, index_idx=None): names = data.columns if isinstance(endog_idx, (int, long)): endog_name = names[endog_idx] endog = data[endog_name].copy() if exog_idx is None: exog = data.drop([endog_name], axis=1) else: exog = data[names[exog_idx]].copy() else: endog = data.loc[:, endog_idx].copy() endog_name = list(endog.columns) if exog_idx is None: exog = data.drop(endog_name, axis=1) elif isinstance(exog_idx, (int, long)): exog = data[names[exog_idx]].copy() else: exog = data[names[exog_idx]].copy() if index_idx is not None: # NOTE: will have to be improved for dates index = Index(data.iloc[:, index_idx]) endog.index = index exog.index = index.copy() data = data.set_index(names[index_idx]) exog_name = list(exog.columns) dataset = Dataset(data=data, names=list(names), endog=endog, exog=exog, endog_name=endog_name, exog_name=exog_name) return dataset
def sort_columns_based_first_character(columns: pd.Index, list_order: List[str]) -> np.ndarray: """ columns の 先頭文字をlist_order の順番に沿って並び替える Params:: columns: 対象のcolumn list_order: 先頭文字の順番 """ columns = columns.copy() df_col = pd.DataFrame(columns.values, columns=["colname"]) df_col["order"] = np.nan # None で初期化する. ここに順番を表す数字を入力する # 先頭文字が長い順にsortして、長い方から当てはめていく df = pd.DataFrame(list_order, columns=["first_character"]) df["length"] = df["first_character"].apply(lambda x: len(x)) df = df.sort_values(by=["length"], ascending=False) for i in df.index: first_character = df.loc[i, "first_character"] boolwk = (df_col["colname"].str.contains("^" + first_character) & df_col["order"].isna()) df_col.loc[boolwk, "order"] = i val = df_col["order"].fillna( -1).max() # dtype=object で 1, nan, None の列のmax()は空になる df_col["order"] = df_col["order"].fillna(val + 1) return df_col.sort_values(by=["order", "colname"])["colname"].values
def nanmap(columns: pd.Index, dict_map: dict) -> np.ndarray: """ columns.map(dict) では dict に定義されていない変数は nan になる. 定義されてないcolumnはそのまま残すようにする """ columns = columns.copy() columnswk = columns.map(dict_map).fillna("__work").values columns = columns.values columns[(columnswk != "__work")] = columnswk[(columnswk != "__work")] return columns
def from_pandas_index(cls, index: pd.Index, dim: Hashable): from .variable import IndexVariable if index.name is None: name = dim index = index.copy() index.name = dim else: name = index.name data = PandasIndexingAdapter(index) index_var = IndexVariable(dim, data, fastpath=True) return cls(index, dim), {name: index_var}