Esempio n. 1
0
    def convert_dataset(dataset):
        data_x_numeric = dataset.loc[:, dataset.columns != "status"]
        data_x_numeric = data_x_numeric.loc[:,
                                            data_x_numeric.columns != "time"]
        # convert string columns to categorical type
        for col in data_x_numeric.columns:
            if str(data_x_numeric[col].dtype) == "object":
                data_x_numeric[col] = data_x_numeric[col].astype('category')
        data_x_numeric = OneHotEncoder().fit_transform(data_x_numeric)

        data_y = dataset[["status", "time"]]
        data_y = data_y.reindex(columns=["status", "time"])
        data_y["status"] = data_y["status"].astype('bool')

        pd_y_values = data_y.copy()
        pd_y_values = pd_y_values.rename(index=int,
                                         columns={"status": "event"})
        pd_y_values = pd_y_values.reindex(columns=["time", "event"])

        # test on sorted input data
        test_data = data_x_numeric.copy()
        test_timed_data = test_data
        test_timed_data['time'] = pd_y_values["time"]

        return data_x_numeric, pd_y_values, test_timed_data
Esempio n. 2
0
    def convert_dataset(dataset):
        # convert string columns to categorical type
        for col in dataset.columns:
            if str(dataset[col].dtype) == "object":
                dataset.loc[:, col] = dataset[col].astype('category')

        data_x_numeric = OneHotEncoder().fit_transform(dataset[[
            "horTh", "age", "menostat", "tsize", "tgrade", "pnodes", "progrec",
            "estrec"
        ]])
        data_y = dataset[["time", "cens"]]
        data_y = data_y.reindex(columns=["cens", "time"])
        data_y["cens"] = data_y["cens"].astype('bool')

        pd_y_values = data_y.copy()
        pd_y_values = pd_y_values.rename(index=int, columns={"cens": "event"})
        pd_y_values = pd_y_values.reindex(columns=["time", "event"])

        # test on sorted input data
        test_data = data_x_numeric.copy()
        test_timed_data = test_data
        test_timed_data['time'] = pd_y_values["time"]

        return data_x_numeric, pd_y_values, test_timed_data