def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get predicted user id"): predicted_user = pd.read_csv( '../../data/interim/20190901_user_ids_share.csv') train = pd.merge( train, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') test = pd.merge( test, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') with timer("count encoding"): categorical_cols = self.categorical_features() for col in categorical_cols: train_result, test_result = count_encoding(col, train, test) self.train_feature[col] = train_result self.test_feature[col] = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("create features"): train_result, test_result = matrix_factorize(categorical_cols, train, test) self.train_feature = train_result self.test_feature = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("label encoding"): for col1, col2 in combinations(categorical_cols, 2): new_fe_col_name = f'{col1}_{col2}' train[new_fe_col_name] = train[col1].astype("str") + "_" + train[col2].astype("str") test[new_fe_col_name] = test[col1].astype("str") + "_" + test[col2].astype("str") train_result, test_result = label_encoding(new_fe_col_name, train, test) self.train_feature[new_fe_col_name] = train_result self.test_feature[new_fe_col_name] = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("label encoding"): categorical_cols = self.categorical_features() for col in categorical_cols: train_result, test_result = label_encoding(col, train, test) self.train_feature[col] = train_result self.test_feature[col] = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get predicted user id"): predicted_user = pd.read_csv( '../../data/interim/20190901_user_ids_share.csv') train = pd.merge( train, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') test = pd.merge( test, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') total = train.append(test).reset_index(drop=True) with timer("get original cols"): org_cols = total.columns with timer("aggregate categorical features"): total = calc_agg_category_func(total, groupby_dict) new_cols = [c for c in total.columns if c not in org_cols] total = total[new_cols] logger.info(f"n_features: {len(new_cols)}") train_result = total.iloc[:len(train)].reset_index(drop=True) test_result = total.iloc[len(train):].reset_index(drop=True) self.train_feature = train_result self.test_feature = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get predicted user id"): predicted_user = pd.read_csv('../../data/interim/20190901_user_ids_share.csv') train = pd.merge(train, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') test = pd.merge(test, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') total = train.append(test).reset_index(drop=True) total['TransactionAmt_decimal'] = ((total['TransactionAmt'] - total['TransactionAmt'].astype(int)) * 1000).astype(int) with timer("make V features"): ### V75 ~~~ 94 cols_V75_94 = [f'V{no}' for no in range(75, 95, 1)] cols_other = [f'V{no}' for no in [75, 88, 89, 90, 91, 94, 100, 104, 105, 106]] cols_V75_94 = list(set(cols_V75_94) - set(cols_other)) total['V75_94_mean'] = total[cols_V75_94].mean(axis=1) ### V95 ~~~ 137 cols_V95_137 = [f'V{no}' for no in range(95, 138, 1)] cols_V95_137 = list(set(cols_V95_137) - set([f'V{no}' for no in range(130, 138, 1)])) cols_other = [f'V{no}' for no in [96, 98, 99, 100, 104, 105, 106 , 120, 121, 122, 126, 127, 128]] cols_other_2 = [f'V{no}' for no in [117, 118, 119]] cols_V95_137 = sorted(list(set(cols_V95_137) - set(cols_other) -set(cols_other_2))) total['V95_137_mean'] = total[cols_V95_137].mean(axis=1) ### V167 ~~~ 216 cols_V167_216 = [f'V{no}' for no in range(167, 217, 1)] cols_other = [f'V{no}' for no in range(186, 202, 1)] no_use_cols = [f'V{no}' for no in [169, 172, 173, 174, 175] + list(range(202, 217, 1))] cols_V167_216 = sorted(list(set(cols_V167_216) - set(cols_other) -set(no_use_cols))) total['V167_216_mean'] = total[cols_V167_216].mean(axis=1) ### V242 ~~~ 263 cols_V242_263 = [f'V{no}' for no in list(range(242, 250, 1)) + list(range(252, 255, 1)) + list(range(257, 263, 1))] total['V242_263_mean'] = total[cols_V242_263].mean(axis=1) org_cols = total.columns with timer("sin/cos transformation"): total["D9_sin"] = np.sin(2 * np.pi * total["D9"] / 24).round(4) total["D9_cos"] = np.cos(2 * np.pi * total["D9"] / 24).round(4) total["D9_LocalTime_sin"] = np.sin(2 * np.pi * total["D9_LocalTime"] / 24).round(4) total["D9_LocalTime_cos"] = np.cos(2 * np.pi * total["D9_LocalTime"] / 24).round(4) with timer("group by features"): groupby = GroupbyTransformer(param_dict=groupby_dict) total = groupby.transform(total) diff = DiffGroupbyTransformer(param_dict=diff_dict) total = diff.transform(total) ratio = RatioGroupbyTransformer(param_dict=diff_dict) total = ratio.transform(total) new_cols = [c for c in total.columns if c not in org_cols] total = total[new_cols] logger.info(f"n_features: {len(new_cols)}") train_result = total.iloc[:len(train)].reset_index(drop=True) test_result = total.iloc[len(train):].reset_index(drop=True) self.train_feature = train_result self.test_feature = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get predicted user id"): predicted_user = pd.read_csv( '../../data/interim/20190901_user_ids_share.csv') train = pd.merge( train, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') test = pd.merge( test, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') total = train.append(test).reset_index(drop=True) with timer("make V features"): ### V75 ~~~ 94 cols_V75_94 = [f'V{no}' for no in range(75, 95, 1)] cols_other = [ f'V{no}' for no in [75, 88, 89, 90, 91, 94, 100, 104, 105, 106] ] cols_V75_94 = list(set(cols_V75_94) - set(cols_other)) total['V75_94_mean'] = total[cols_V75_94].mean(axis=1) ### V95 ~~~ 137 cols_V95_137 = [f'V{no}' for no in range(95, 138, 1)] cols_V95_137 = list( set(cols_V95_137) - set([f'V{no}' for no in range(130, 138, 1)])) cols_other = [ f'V{no}' for no in [96, 98, 99, 100, 104, 105, 106, 120, 121, 122, 126, 127, 128] ] cols_other_2 = [f'V{no}' for no in [117, 118, 119]] cols_V95_137 = sorted( list(set(cols_V95_137) - set(cols_other) - set(cols_other_2))) total['V95_137_mean'] = total[cols_V95_137].mean(axis=1) ### V167 ~~~ 216 cols_V167_216 = [f'V{no}' for no in range(167, 217, 1)] cols_other = [f'V{no}' for no in range(186, 202, 1)] no_use_cols = [ f'V{no}' for no in [169, 172, 173, 174, 175] + list(range(202, 217, 1)) ] cols_V167_216 = sorted( list(set(cols_V167_216) - set(cols_other) - set(no_use_cols))) total['V167_216_mean'] = total[cols_V167_216].mean(axis=1) ### V242 ~~~ 263 cols_V242_263 = [ f'V{no}' for no in list(range(242, 250, 1)) + list(range(252, 255, 1)) + list(range(257, 263, 1)) ] total['V242_263_mean'] = total[cols_V242_263].mean(axis=1) with timer("get original cols"): org_cols = total.columns with timer("Set TransactionDT"): total["TransactionDT"] = total["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) total.set_index("TransactionDT", inplace=True) with timer("get rolling features"): total = calc_rolling_func(total, groupby_dict) new_cols = [c for c in total.columns if c not in org_cols] total = total[new_cols] logger.info(f"n_features: {len(new_cols)}") train_result = total.iloc[:len(train)].reset_index(drop=True) test_result = total.iloc[len(train):].reset_index(drop=True) self.train_feature = train_result self.test_feature = test_result with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): feature_name_list = [] with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get predicted user id"): predicted_user = pd.read_csv( '../../data/interim/20190901_user_ids_share.csv') train = pd.merge( train, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') test = pd.merge( test, predicted_user[['TransactionID', 'predicted_user_id']], how='left', on='TransactionID') with timer("Set TransactionDT"): train["TransactionDT"] = train["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) test["TransactionDT"] = test["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) total = train.append(test).reset_index(drop=True) with timer("Get lag-time to future recoreds"): total['TransactionDT_lag1_future'] = total.groupby( 'predicted_user_id')['TransactionDT'].shift(-1) total["Diff_Time_To_lag1_future"] = ( total["TransactionDT_lag1_future"] - total["TransactionDT"] ).apply(lambda x: x.days * 24 + x.seconds / 60 / 60 if x == x else x).astype(np.int64) total['TransactionDT_lag1_past'] = total.groupby( 'predicted_user_id')['TransactionDT'].shift(1) total["Diff_Time_To_lag1_past"] = ( total["TransactionDT_lag1_past"] - total["TransactionDT"] ).apply(lambda x: x.days * 24 + x.seconds / 60 / 60 if x == x else x).astype(np.int64) feature_name_list.extend( ["Diff_Time_To_lag1_future", "Diff_Time_To_lag1_past"]) with timer("Get lag-value"): # future value total['TransactionAmt_lag1_future'] = total.groupby( 'predicted_user_id')['TransactionAmt'].shift(-1) feature_name_list.extend(["TransactionAmt_lag1_future"]) # past value total['TransactionAmt_lag1_past'] = total.groupby( 'predicted_user_id')['TransactionAmt'].shift(1) feature_name_list.extend(["TransactionAmt_lag1_past"]) # current value - future value total['diff_TransactionAmt_lag1_future'] = total[ 'TransactionAmt'] - total['TransactionAmt_lag1_future'] feature_name_list.extend(["diff_TransactionAmt_lag1_future"]) # current value - past value total['diff_TransactionAmt_lag1_past'] = total[ 'TransactionAmt'] - total['TransactionAmt_lag1_past'] feature_name_list.extend(["diff_TransactionAmt_lag1_past"]) # current value / future value total['div_TransactionAmt_lag1_future'] = total[ 'TransactionAmt'] / total['TransactionAmt_lag1_future'] feature_name_list.extend(["div_TransactionAmt_lag1_future"]) # current value / past value total['div_TransactionAmt_lag1_past'] = total[ 'TransactionAmt'] / total['TransactionAmt_lag1_past'] feature_name_list.extend(["div_TransactionAmt_lag1_past"]) # slope total['slope_TransactionAmt_lag1_future'] = ( total['TransactionAmt'] - total['TransactionAmt_lag1_future'] ) / total['Diff_Time_To_lag1_future'] total['slope_TransactionAmt_lag1_past'] = ( total['TransactionAmt'] - total['TransactionAmt_lag1_past'] ) / total['Diff_Time_To_lag1_past'] feature_name_list.extend([ "slope_TransactionAmt_lag1_future", "slope_TransactionAmt_lag1_past", ]) # groupby value - future value grp_Amt = total.groupby( 'predicted_user_id')['TransactionAmt'].mean().reset_index() grp_Amt.columns = ['predicted_user_id', 'groupby_TransactionAmt'] total = total.merge(grp_Amt, how='left', on='predicted_user_id') total['diff_grp_TransactionAmt_lag1_future'] = total[ 'groupby_TransactionAmt'] - total['TransactionAmt_lag1_future'] total['diff_grp_TransactionAmt_lag1_past'] = total[ 'groupby_TransactionAmt'] - total['TransactionAmt_lag1_past'] feature_name_list.extend([ "diff_grp_TransactionAmt_lag1_future", "diff_grp_TransactionAmt_lag1_past", ]) with timer("end"): train_result = total.iloc[:len(train)].reset_index(drop=True) test_result = total.iloc[len(train):].reset_index(drop=True) for fe in feature_name_list: self.train_feature[fe] = train_result[fe] self.test_feature[fe] = test_result[fe] self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("concat os and browser info"): os_info = load_os_release_date() os_info.sort_values(["os_type", "os_release_date"], inplace=True) os_info["os_release_date_next_ver"] = os_info.groupby( "os_type")["os_release_date"].shift(-1).fillna("2019-09-01") train = pd.merge(train, os_info, how="left", left_on="id_30", right_on="os_name").drop(columns="id_30") test = pd.merge(test, os_info, how="left", left_on="id_30", right_on="os_name").drop(columns="id_30") browser_info = load_browser_release_date() browser_info.sort_values(["browser_type", "browser_release_date"], inplace=True) browser_info[ "browser_release_date_next_ver"] = browser_info.groupby( "browser_type")["browser_release_date"].shift(-1).fillna( "2019-09-01") train = pd.merge(train, browser_info, how="left", left_on="id_31", right_on="browser_name").drop(columns="id_31") test = pd.merge(test, browser_info, how="left", left_on="id_31", right_on="browser_name").drop(columns="id_31") total = train.append(test).reset_index(drop=True) feature_name_list = [] with timer("convert object-type to datetime-type"): total["TransactionDT"] = total["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) total["os_release_date"] = total["os_release_date"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d") if x == x else x) total["browser_release_date"] = total[ "browser_release_date"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d") if x == x else x) total = get_new_browser(total) with timer("make features: elapsed times from release date"): total["elapsed_days_from_os_release"] = ( total["TransactionDT"] - total["os_release_date"]).apply(lambda x: x.days) total["elapsed_days_from_browser_release"] = ( total["TransactionDT"] - total["browser_release_date"]).apply(lambda x: x.days) feature_name_list.extend([ "elapsed_days_from_os_release", "elapsed_days_from_browser_release" ]) with timer("make features: elapsed times from new-version"): total = pd.merge( total, browser_info[["browser_name", "browser_release_date"]].rename( columns={ "browser_name": "new_browser_name", "browser_release_date": "new_browser_release_date" }), how="left", on="new_browser_name") total["latest_browser"] = np.nan total.loc[total["browser_name"].notnull(), "latest_browser"] = ( total.loc[total["browser_name"].notnull(), "browser_name"] == total.loc[total["browser_name"].notnull(), "new_browser_name"]).astype(int) total["new_browser_release_date"] = total[ "new_browser_release_date"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d") if x == x else x) total["elapsed_days_from_new_browser_release"] = ( total["TransactionDT"] - total["new_browser_release_date"]).apply(lambda x: x.days) total.loc[total["latest_browser"] == 1, "elapsed_days_from_new_browser_release"] = 0 total["elapsed_days_from_new_browser_release_v2"] = total[ "elapsed_days_from_new_browser_release"] + total[ "elapsed_days_from_browser_release"] total.loc[total["latest_browser"] == 1, "elapsed_days_from_new_browser_release_v2"] = 0 feature_name_list.extend([ "elapsed_days_from_new_browser_release", "latest_browser", "elapsed_days_from_new_browser_release_v2" ]) with timer("end"): train_result = total.iloc[:len(train)].reset_index(drop=True) test_result = total.iloc[len(train):].reset_index(drop=True) for fe in feature_name_list: self.train_feature[fe] = train_result[fe] self.test_feature[fe] = test_result[fe] self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)
def create_features(self): with timer("load data"): train = read_preprocessing_data(DATA_DIR, "train", write_mode=False) test = read_preprocessing_data(DATA_DIR, "test", write_mode=False) with timer("get numeric features"): numeric_cols = get_numeric_cols() self.train_feature[numeric_cols] = train[numeric_cols] self.test_feature[numeric_cols] = test[numeric_cols] with timer( "make features: V features related to TransactionAmt + TransactionAmt" ): v_cols_related_to_amt = get_V_cols_related_to_Amt() for col in v_cols_related_to_amt: new_fe_col_name = col + "_add_Amt" self.train_feature[ new_fe_col_name] = train[col] + train["TransactionAmt"] self.test_feature[ new_fe_col_name] = test[col] + test["TransactionAmt"] with timer("make features: TransactionAmt * CXX"): c_cols = ['C1', 'C13', 'C14'] for col in c_cols: new_fe_col_name = col + "_mul_Amt" self.train_feature[ new_fe_col_name] = train[col] * train['TransactionAmt'] self.test_feature[ new_fe_col_name] = test[col] * test['TransactionAmt'] with timer("numeric feature processings"): self.train_feature["day_of_week"] = train["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S" ).weekday()) self.test_feature["day_of_week"] = test["TransactionDT"].apply( lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S" ).weekday()) self.train_feature['TransactionAmt_decimal'] = ( (train['TransactionAmt'] - train['TransactionAmt'].astype(int)) * 1000).astype(int) self.test_feature['TransactionAmt_decimal'] = ( (test['TransactionAmt'] - test['TransactionAmt'].astype(int)) * 1000).astype(int) with timer("agg V features"): # V1 ~ V11 vcol_names = [f'V{i}' for i in range(1, 12)] self.train_feature['sum_V1_V11'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V1_V11'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V1_V11'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V1_V11'] = test[vcol_names].isnull( ).sum(axis=1) # V12 ~ V34 vcol_names = [f'V{i}' for i in range(12, 35)] self.train_feature['sum_V12_V34'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V12_V34'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V12_V34'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V12_V34'] = test[vcol_names].isnull( ).sum(axis=1) # V35 ~ V52 vcol_names = [f'V{i}' for i in range(35, 53)] self.train_feature['sum_V35_V52'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V35_V52'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V35_V52'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V35_V52'] = test[vcol_names].isnull( ).sum(axis=1) # V53 ~ V74 vcol_names = [f'V{i}' for i in range(53, 75)] self.train_feature['sum_V53_V74'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V53_V74'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V53_V74'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V53_V74'] = test[vcol_names].isnull( ).sum(axis=1) # V75 ~ V94 vcol_names = [f'V{i}' for i in range(75, 95)] self.train_feature['sum_V75_V94'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V75_V94'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V75_V94'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V75_V94'] = test[vcol_names].isnull( ).sum(axis=1) # V95 ~ V125 vcol_names = [f'V{i}' for i in range(95, 126)] self.train_feature['sum_V95_V125'] = train[vcol_names].sum(axis=1) self.test_feature['sum_V95_V125'] = test[vcol_names].sum(axis=1) self.train_feature['null_sum_V95_V125'] = train[vcol_names].isnull( ).sum(axis=1) self.test_feature['null_sum_V95_V125'] = test[vcol_names].isnull( ).sum(axis=1) # V138 ~ V166 vcol_names = [f'V{i}' for i in range(138, 167)] self.train_feature['null_sum_V138_V166'] = train[ vcol_names].isnull().sum(axis=1) self.test_feature['null_sum_V138_V166'] = test[vcol_names].isnull( ).sum(axis=1) # V167 ~ V216 vcol_names = [f'V{i}' for i in range(167, 217)] self.train_feature['null_sum_V167_V216'] = train[ vcol_names].isnull().sum(axis=1) self.test_feature['null_sum_V167_V216'] = test[vcol_names].isnull( ).sum(axis=1) # V217 ~ V278 vcol_names = [f'V{i}' for i in range(217, 279)] self.train_feature['null_sum_V217_V278'] = train[ vcol_names].isnull().sum(axis=1) self.test_feature['null_sum_V217_V278'] = test[vcol_names].isnull( ).sum(axis=1) # V279 ~ V321 vcol_names = [f'V{i}' for i in range(279, 322)] self.train_feature['null_sum_V279_V321'] = train[ vcol_names].isnull().sum(axis=1) self.test_feature['null_sum_V279_V321'] = test[vcol_names].isnull( ).sum(axis=1) # V322 ~ V339 vcol_names = [f'V{i}' for i in range(322, 340)] self.train_feature['null_sum_V322_V339'] = train[ vcol_names].isnull().sum(axis=1) self.test_feature['null_sum_V322_V339'] = test[vcol_names].isnull( ).sum(axis=1) with timer("end"): self.train_feature.reset_index(drop=True, inplace=True) self.test_feature.reset_index(drop=True, inplace=True)