def test_multivariate_logrank_on_dd_dataset(): """ library('survival') dd = read.csv('~/code/lifelines/lifelines/datasets/dd.csv') results = survdiff(Surv(duration, observed)~regime, data=dd, rho=0) results[5] """ dd = load_dd() results = stats.multivariate_logrank_test(dd["duration"], dd["regime"], dd["observed"]) assert abs(results.test_statistic - 322.5991) < 0.0001
from lifelines.datasets import load_dd from lifelines import KaplanMeierFitter from matplotlib import pyplot as plt data = load_dd() from lifelines.datasets import load_rossi from lifelines import CoxPHFitter from infra import * import pandas FILTER_IN = "filter in" FILTER_OUT = "filter out" OS_FIELDS = ["_OS", "_OS_IND"] pheno_start = 1 pheno_limit = 135 def cox_phenotype(test_independently, filtered_out, filtered_in, filter_type, filter_na_by_rows, phenotype_file_name, survival_file_name): phenotype_dataset = load_phenotype_data( phenotype_file_name=phenotype_file_name, phenotype_list_path=None) survival_dataset = load_survival_data(survival_file_name, survival_list_path=None) pheno_survival_integrated = {} for cur_pheno in phenotype_dataset[1:]: pheno_survival_integrated[ cur_pheno[0]] = cur_pheno[pheno_start:pheno_limit] for cur_survival in survival_dataset[1:]: if pheno_survival_integrated.has_key(cur_survival[0]): pheno_survival_integrated[ cur_survival[0]] = pheno_survival_integrated[