Example #1
0
def worker(data: Table, learner, state: TaskState):
    # No need to check for irregularities, this is done in widget
    time_var, event_var = get_survival_endpoints(data.domain)

    def fit_cox_models(attrs_combinations):
        results = []
        for attrs in attrs_combinations:
            columns = attrs + [time_var.name, event_var.name]
            cph_model = learner(data[:, columns])
            log2p = cph_model.ll_ratio_log2p()
            result = Result(log2p, cph_model)
            results.append(result)
        return results

    attributes = [attr for attr in data.domain.attributes]
    progress_steps = iter(np.linspace(0, 100, len(attributes)))
    _trace = fit_cox_models([attributes])
    while len(_trace) != len(data.domain.attributes):
        attributes = [attr for attr in _trace[-1].model.domain.attributes]

        if len(attributes) > 1:
            combinations = [
                list(comb)
                for comb in itertools.combinations(attributes,
                                                   len(attributes) - 1)
            ]
        else:
            combinations = [attributes]

        results = fit_cox_models(combinations)
        _trace.append(max(results, key=lambda result: result.log2p))
        state.set_progress_value(next(progress_steps))
    return _trace
Example #2
0
def stratify(stratify_on: ContinuousVariable, splitting_criteria: int,
             domain: Domain, callback, data: Table):
    data = data.transform(domain)
    stratify_on = stratify_on.compute_value(data)

    def check_unique_values(split_by, values):
        # return split_by(np.unique(values))
        unique_vals = np.unique(values)
        if len(unique_vals) <= 2:
            return split_by(unique_vals)
        else:
            return split_by(values)

    if splitting_criteria == SplittingCriteria.Median:
        cutoff = partial(check_unique_values, np.median)
    elif splitting_criteria == SplittingCriteria.Mean:
        cutoff = partial(check_unique_values, np.mean)
    elif splitting_criteria == SplittingCriteria.LogRankTest:
        time_var, event_var = get_survival_endpoints(data.domain)
        durations, _ = data.get_column_view(time_var)
        events, _ = data.get_column_view(event_var)
        cutoff = partial(cutoff_by_log_rank_optimization, durations, events,
                         callback)
    else:
        raise ValueError('Unknown splitting criteria')

    return (stratify_on > cutoff(stratify_on)).astype(int)
Example #3
0
    def setUp(self) -> None:
        self.test_data_path = os.path.join(os.path.dirname(__file__),
                                           'datasets')
        # create widgets
        self.as_survival = self.create_widget(OWAsSurvivalData)
        self.widget = self.create_widget(OWKaplanMeier)

        # handle survival data
        self.send_signal(self.as_survival.Inputs.data,
                         Table(f'{self.test_data_path}/toy_example.tab'))
        simulate.combobox_activate_item(
            self.as_survival.controls.time_var,
            self.as_survival._data.columns.Time.name)
        simulate.combobox_activate_item(
            self.as_survival.controls.event_var,
            self.as_survival._data.columns.Event.name)
        self.send_signal(self.widget.Inputs.data,
                         self.get_output(self.as_survival.Outputs.data))

        # check survival data
        time_var, event_var = get_survival_endpoints(self.widget.data.domain)
        self.assertEqual(time_var.name, 'Time')
        self.assertEqual(event_var.name, 'Event')
        self.assertIn(time_var, self.widget.data.domain.class_vars)
        self.assertIn(event_var, self.widget.data.domain.class_vars)

        # check if missing data detected
        self.assertTrue(self.widget.Warning.missing_values_detected.is_shown())

        self.widget.auto_commit = True

        # If we don't do this function ViewBox.mapSceneToView fails with num py.linalg.LinAlgError: Singular matrix
        vb = self.widget.graph.getViewBox()
        vb.resize(200, 200)
Example #4
0
    def fit(self, data):
        if not contains_survival_endpoints(data.domain):
            raise ValueError(MISSING_SURVIVAL_DATA)
        time_var, event_var = get_survival_endpoints(data.domain)

        df = to_data_frame(data)
        cph = CoxPHFitter(**self.params['kwargs'])
        cph = cph.fit(df, duration_col=time_var.name, event_col=event_var.name)
        return CoxRegressionModel(cph)
    def compute_score(self, results):
        domain = results.domain
        time_var, event_var = get_survival_endpoints(domain)

        c_index = concordance_index(
            results.actual[:, domain.class_vars.index(time_var)],
            -results.predicted,
            results.actual[:, domain.class_vars.index(event_var)],
        )
        return [c_index]
    def fit(self, data):
        if not contains_survival_endpoints(data.domain):
            raise ValueError(self.learner_adequacy_err_msg)
        time_var, event_var = get_survival_endpoints(data.domain)

        df = table_to_frame(data, include_metas=False)
        df = df.dropna(axis=0)
        df[time_var.name] = df[time_var.name].astype(float)
        df[event_var.name] = df[event_var.name].astype(float)
        cph = CoxPHFitter(**self.params['kwargs'])
        cph = cph.fit(df, duration_col=time_var.name, event_col=event_var.name)
        return CoxRegressionModel(cph)
Example #7
0
def stratify(stratify_on: ContinuousVariable, splitting_criteria: int, callback, data: Table):
    stratify_on = stratify_on.compute_value(data)

    if splitting_criteria == SplittingCriteria.Median:
        cutoff = np.median
    elif splitting_criteria == SplittingCriteria.Mean:
        cutoff = np.mean
    elif splitting_criteria == SplittingCriteria.LogRankTest:
        time_var, event_var = get_survival_endpoints(data.domain)
        durations, _ = data.get_column_view(time_var)
        events, _ = data.get_column_view(event_var)
        cutoff = partial(cutoff_by_log_rank_optimization, durations, events, callback)
    else:
        raise ValueError('Unknown splitting criteria')

    return (stratify_on > cutoff(stratify_on)).astype(int)
    def set_data(self, data: Table):
        self.closeContext()
        self.selected_attrs = []
        self.covariates_from_worker_result = []
        self.model.clear()
        self.model.resetSorting()

        if not data:
            return

        self.data = data
        self.attr_name_to_variable = {
            attr.name: attr
            for attr in self.data.domain.attributes
        }

        self.openContext(data)
        time_var, event_var = get_survival_endpoints(self.data.domain)
        self.time_var, self.event_var = time_var.name, event_var.name
        self.start(worker, self.data, self.covariates, self.time_var,
                   self.event_var)
    def event_var(self):
        if not self.data:
            return

        _, event_var = get_survival_endpoints(self.data.domain)
        return event_var
    def time_var(self):
        if not self.data:
            return

        time_var, _ = get_survival_endpoints(self.data.domain)
        return time_var