def convert_dataset(dataset): data_x_numeric = dataset.loc[:, dataset.columns != "status"] data_x_numeric = data_x_numeric.loc[:, data_x_numeric.columns != "time"] # convert string columns to categorical type for col in data_x_numeric.columns: if str(data_x_numeric[col].dtype) == "object": data_x_numeric[col] = data_x_numeric[col].astype('category') data_x_numeric = OneHotEncoder().fit_transform(data_x_numeric) data_y = dataset[["status", "time"]] data_y = data_y.reindex(columns=["status", "time"]) data_y["status"] = data_y["status"].astype('bool') pd_y_values = data_y.copy() pd_y_values = pd_y_values.rename(index=int, columns={"status": "event"}) pd_y_values = pd_y_values.reindex(columns=["time", "event"]) # test on sorted input data test_data = data_x_numeric.copy() test_timed_data = test_data test_timed_data['time'] = pd_y_values["time"] return data_x_numeric, pd_y_values, test_timed_data
def convert_dataset(dataset): # convert string columns to categorical type for col in dataset.columns: if str(dataset[col].dtype) == "object": dataset.loc[:, col] = dataset[col].astype('category') data_x_numeric = OneHotEncoder().fit_transform(dataset[[ "horTh", "age", "menostat", "tsize", "tgrade", "pnodes", "progrec", "estrec" ]]) data_y = dataset[["time", "cens"]] data_y = data_y.reindex(columns=["cens", "time"]) data_y["cens"] = data_y["cens"].astype('bool') pd_y_values = data_y.copy() pd_y_values = pd_y_values.rename(index=int, columns={"cens": "event"}) pd_y_values = pd_y_values.reindex(columns=["time", "event"]) # test on sorted input data test_data = data_x_numeric.copy() test_timed_data = test_data test_timed_data['time'] = pd_y_values["time"] return data_x_numeric, pd_y_values, test_timed_data