Beispiel #1
0
    def __init__(self, path, **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        failures = df[cols[0]].to_numpy()
        self.failures = removeNaNs(failures)
        if len(cols) > 1:
            right_censored = df[cols[1]].to_numpy()
            self.right_censored = removeNaNs(right_censored)
            f, rc = list(self.failures), list(self.right_censored)
            len_f, len_rc = len(f), len(rc)
            max_len = max(len_f, len_rc)
            if not max_len == len_f:
                f.extend([""] * (max_len - len_f))
            if not max_len == len_rc:
                rc.extend([""] * (max_len - len_rc))
            Data = {"failures": f, "right censored": rc}
            self.__df = pd.DataFrame(Data, columns=["failures", "right censored"])
        else:
            self.right_censored = None
            Data = {"failures": self.failures}
            self.__df = pd.DataFrame(Data, columns=["failures"])

        if len(cols) > 2:
            colorprint(
                "WARNING: xlsx_to_FR assumes the first two columns in the excel file are 'failures' and 'right censored'. All other columns have been ignored",
                text_color="red",
            )
Beispiel #2
0
    def __init__(self, path, censor_code_in_xlsx=None, failure_code_in_xlsx=None, censor_code_in_XCN='C', failure_code_in_XCN='F', **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        X = df[cols[0]].to_numpy()
        X = np.array(removeNaNs(list(X)))
        C0 = df[cols[1]].to_numpy()
        C0 = removeNaNs(C0)
        C_upper = []
        for item in C0:
            if type(item) in [str, np.str_]:
                C_upper.append(item.upper())  # for strings
            else:
                C_upper.append(item)  # for numbers
        C_unique = np.unique(C_upper)
        if len(C_unique) > 2:
            error_str = str('xlsx_to_XCN assumes the second column is C (censoring code). A maximum of 2 unique censoring codes are allowed. Within this column there were ' + str(len(C_unique)) + ' unique values: ' + str(C_unique))
            raise ValueError(error_str)
        C_out = []
        if type(failure_code_in_xlsx) in [str, np.str_]:  # need to upper() the input since we are comparing with C_upper
            failure_code_in_xlsx = failure_code_in_xlsx.upper()
        if type(censor_code_in_xlsx) in [str, np.str_]:
            censor_code_in_xlsx = censor_code_in_xlsx.upper()

        for item in C_upper:
            if item == failure_code_in_xlsx:
                C_out.append(failure_code_in_XCN)
            elif item == censor_code_in_xlsx:
                C_out.append(censor_code_in_XCN)
            elif item in ['F', 'FAIL', 'FAILED', 0]:
                C_out.append(failure_code_in_XCN)
            elif item in ['R', 'RC', 'RIGHT CENS', 'RIGHT CENSORED', 'C', 'CENSORED', 'CENS', 'S', 'SUSP', 'SUSPENSION', 'SUSPENDED', 'UF', 'UNFAILED', 'UNFAIL', 'NF', 'NO FAIL', 'NO FAILURE', 'NOT FAILED', 1]:
                C_out.append(censor_code_in_XCN)
            else:
                raise ValueError('Unrecognised value in the second column of the xlsx file. xlsx_to_XCN assumes the second column is C (censoring code). Common values are used as defaults but the xlsx file contained unrecognised values. You can fix this by specifying the arguments censor_code_in_xlsx  and failure_code_in_xlsx.')
        C = np.array(C_out)

        if len(cols) > 2:
            N = df[cols[2]].to_numpy()
            N = removeNaNs(N)
        else:
            N = np.ones_like(X)  # if N is missing then it is assumed as all ones
        if len(cols) > 3:
            colorprint("WARNING: xlsx_to_XCN assumes the first three columns in the excel file are being used for 'X' (event times), 'C' (censoring codes), 'N' (number of items at each event time). All other columns have been ignored", text_color='red')
        if len(X) != len(C) or len(X) != len(N):
            raise ValueError('The lengths of the first 3 columns in the xlsx file do not match. This may be because some data is missing.')

        FR = XCN_to_FR(X=X, C=C, N=N)  # we do this seeming redundant conversion to combine any duplicates from FNRN which were not correctly summarized in the input data
        XCN = FR_to_XCN(failures=FR.failures, right_censored=FR.right_censored)
        self.X = XCN.X
        self.C = XCN.C
        self.N = XCN.N
        Data = {'event time': self.X, 'censor code': self.C, 'number of events': self.N}
        self.__df = pd.DataFrame(data=Data, columns=['event time', 'censor code', 'number of events'])
Beispiel #3
0
    def __init__(self, path, **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        failures = df[cols[0]].to_numpy()
        num_failures = df[cols[1]].to_numpy()
        failures = removeNaNs(failures)
        num_failures = removeNaNs(num_failures)
        if len(failures) != len(num_failures):
            raise ValueError("xlsx_to_FNRN assumes the first and second columns in the excel file are 'failures' and 'number of failures'. These must be the same length.")
        if len(cols) == 2:
            right_censored = None
            num_right_censored = None
        else:
            right_censored = df[cols[2]].to_numpy()
            num_right_censored = df[cols[3]].to_numpy()
            right_censored = removeNaNs(right_censored)
            num_right_censored = removeNaNs(num_right_censored)
            if len(right_censored) != len(num_right_censored):
                raise ValueError("xlsx_to_FNRN assumes the third and fourth columns in the excel file are 'right censored' and 'number of right censored'. These must be the same length.")
        if len(cols) > 4:
            colorprint("WARNING: xlsx_to_FNRN assumes the first four columns in the excel file are 'failures', 'number of failures', 'right censored', 'number of right censored'. All other columns have been ignored", text_color='red')

        FR = FNRN_to_FR(failures=failures, num_failures=num_failures, right_censored=right_censored, num_right_censored=num_right_censored)
        FNRN = FR_to_FNRN(failures=FR.failures, right_censored=FR.right_censored)  # we do this seeming redundant conversion to combine any duplicates from FNRN which were not correctly summarized in the input data
        self.failures = FNRN.failures
        self.num_failures = FNRN.num_failures
        self.right_censored = FNRN.right_censored
        self.num_right_censored = FNRN.num_right_censored

        # make the dataframe for printing and writing to excel
        if self.right_censored is not None:
            f, nf, rc, nrc = list(self.failures), list(self.num_failures), list(self.right_censored), list(self.num_right_censored)
            len_f, len_rc = len(f), len(rc)
            max_len = max(len_f, len_rc)
            if not max_len == len_f:
                f.extend([''] * (max_len - len_f))
                nf.extend([''] * (max_len - len_f))
            if not max_len == len_rc:
                rc.extend([''] * (max_len - len_rc))
                nrc.extend([''] * (max_len - len_rc))
            Data = {'failures': f, 'number of failures': nf, 'right censored': rc, 'number of right censored': nrc}
            self.__df = pd.DataFrame(Data, columns=['failures', 'number of failures', 'right censored', 'number of right censored'])
        else:
            Data = {'failures': self.failures, 'number of failures': self.num_failures}
            self.__df = pd.DataFrame(Data, columns=['failures', 'number of failures'])
Beispiel #4
0
    def __init__(
        self,
        path,
        censor_code_in_xlsx=None,
        failure_code_in_xlsx=None,
        censor_code_in_XCN="C",
        failure_code_in_XCN="F",
        **kwargs
    ):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        X = df[cols[0]].to_numpy()
        X = np.array(removeNaNs(list(X)))
        # C0 needs to be to_list not to_numpy in case of mixtures of strings and numbers which numpy would convert all to strings
        C0 = df[cols[1]].to_list()
        C0 = removeNaNs(C0)
        C_upper = []
        for item in C0:
            if type(item) in [str, np.str_]:
                C_upper.append(item.upper())  # for strings
            else:
                C_upper.append(item)  # for numbers
        C_unique = np.unique(C_upper)
        if len(C_unique) > 2:
            error_str = str(
                "xlsx_to_XCN assumes the second column is C (censoring code). A maximum of 2 unique censoring codes are allowed. Within this column there were "
                + str(len(C_unique))
                + " unique values: "
                + str(C_unique)
            )
            raise ValueError(error_str)
        C_out = []
        # need to upper() the input since we are comparing with C_upper
        if type(failure_code_in_xlsx) in [str, np.str_]:
            failure_code_in_xlsx = failure_code_in_xlsx.upper()
        if type(censor_code_in_xlsx) in [str, np.str_]:
            censor_code_in_xlsx = censor_code_in_xlsx.upper()

        for item in C_upper:
            if item == failure_code_in_xlsx:
                C_out.append(failure_code_in_XCN)
            elif item == censor_code_in_xlsx:
                C_out.append(censor_code_in_XCN)
            elif item in ["F", "FAIL", "FAILED", "FAILURE", 0]:
                C_out.append(failure_code_in_XCN)
            elif item in [
                "R",
                "RC",
                "RIGHT CENS",
                "RIGHT CENSORED",
                "C",
                "CENSORED",
                "CENS",
                "S",
                "SUSP",
                "SUSPENSION",
                "SUSPENDED",
                "UF",
                "UNFAILED",
                "UNFAIL",
                "NF",
                "NO FAIL",
                "NO FAILURE",
                "NOT FAILED",
                1,
            ]:
                C_out.append(censor_code_in_XCN)
            else:
                raise ValueError(
                    "Unrecognised value in the second column of the xlsx file. xlsx_to_XCN assumes the second column is C (censoring code). Common values are used as defaults but the xlsx file contained unrecognised values. You can fix this by specifying the arguments censor_code_in_xlsx  and failure_code_in_xlsx."
                )
        C = np.array(C_out)

        if len(cols) > 2:
            N = df[cols[2]].to_numpy()
            N = removeNaNs(N)
        else:
            N = np.ones_like(X)  # if N is missing then it is assumed as all ones
        if len(cols) > 3:
            colorprint(
                "WARNING: xlsx_to_XCN assumes the first three columns in the excel file are being used for 'X' (event times), 'C' (censoring codes), 'N' (number of items at each event time). All other columns have been ignored",
                text_color="red",
            )
        if len(X) != len(C) or len(X) != len(N):
            raise ValueError(
                "The lengths of the first 3 columns in the xlsx file do not match. This may be because some data is missing."
            )

        FR = XCN_to_FR(
            X=X, C=C, N=N
        )  # we do this seeming redundant conversion to combine any duplicates from FNRN which were not correctly summarized in the input data
        XCN = FR_to_XCN(failures=FR.failures, right_censored=FR.right_censored,failure_code=failure_code_in_XCN,censor_code=censor_code_in_XCN)
        self.X = XCN.X
        self.C = XCN.C
        self.N = XCN.N
        Data = {"event time": self.X, "censor code": self.C, "number of events": self.N}
        self.__df = pd.DataFrame(
            data=Data, columns=["event time", "censor code", "number of events"]
        )