Python KaplanMeierFitter.plotの例、lifelines.KaplanMeierFitter.plot Pythonの例

コード例 #1

0

ファイルを表示

        def survival(self):
		#T= length of time before the 'event' (either time to infection or time to discharge), 
                #E indicates if infection 'event' took place 
		T= []
		E= []
		for key, value in self.patients.iteritems():
			#If infected at baseline - remove from analysis
			if value[2] == 1:
				pass
			else:
				T.append(value[4])
				E.append(value[3])	
		
		#Kaplan Meier Estimator from lifelines package
		kmf= KaplanMeierFitter()
		E_ar = numpy.array(E)
		T_ar = numpy.array(T)
		
		#Fit Kaplan Meier model and plot
		kmf.fit(T_ar, event_observed=E_ar)
		if self.KM == "table":
			print kmf.survival_function_
		elif self.KM == "plot":
			kmf.plot()
                	plt.show()	
		elif self.KM == "median":
			print kmf.median_
		else:
			pass

コード例 #2

0

ファイルを表示

    def plot_km_estimates(self, index):
        # Kaplan-Meier estimations for sub group and complement

        rcParams['figure.figsize'] = 15, 6
        plt.figure(index + 1)
        ax = plt.subplot(111)

        kmf_sg = KaplanMeierFitter()
        kmf_cpl = KaplanMeierFitter()
        kmf_sg.fit(self.sub_group['survival_times'],
                   self.sub_group['events'],
                   label='KM estimates for subgroup',
                   alpha=UserInputs.kmf_alpha)
        kmf_sg.plot(ax=ax)
        kmf_cpl.fit(self.sub_group_complement['survival_times'],
                    self.sub_group_complement['events'],
                    label='KM estimates for complement',
                    alpha=UserInputs.kmf_alpha)
        kmf_cpl.plot(ax=ax)

        title = self.string_repr[0] + ': ' + self.string_repr[1]
        plt.title(title)
        plt.xlabel('Time')
        plt.ylabel('Survival probability')

        fig_id = self.string_repr[0] + '_model'
        plt.savefig(fig_id)

        return

コード例 #3

0

ファイルを表示

ファイル: survival.py プロジェクト: ScienceDuck/TCGA-analysis

def survival_analysis(dataframe, grouping, years = 5):
	# remove patients with null values
	df2 = dataframe.dropna(subset = [grouping])
	df2 = df2.dropna(subset = ['_OS'])
	df2 = df2.dropna(subset = ['_EVENT'])

	# limit analysis to number of years specified
	df2['survival'] = np.nan
	df2['event'] = np.nan
	maxtime = years * 365
	df2['survival'][(df2['_OS'] > maxtime)] = maxtime
	df2['event'][(df2['_OS'] > maxtime)] = 0
	df2['survival'][(df2['_OS'] <= maxtime)] = df2['_OS']
	df2['event'][(df2['_OS'] <= maxtime)] = df2['_EVENT']

	# get groups
	grouped_data = df2.groupby(grouping)
	unique_groups = list(grouped_data.groups.keys())
	unique_groups.sort()

	#plot survival curve
	kmf = KaplanMeierFitter()
	ax = plt.subplot(111)
	for i, group in enumerate(unique_groups):
		data = grouped_data.get_group(group)
		kmf.fit(data['survival'], data['event'], label = group)
		# print(data['_OS'])
		kmf.plot(ax=ax, show_censors = True)

	plt.show()

コード例 #4

0

ファイルを表示

    def plot_kaplan_meier(self, column, value):
        """[plot Kaplan meier survival plots of cleaned METABRIC clinical data]

        Args:
            column ([string]): [column in METABRIC data corresponding to a patient attribute, such as her2 receptor
            status]
            value ([string or integer]): [value of column that is a point of comparision. ie column:her2_recepter value:'negative']
        Plots values in column vs != values in column
        """
        kmf = KaplanMeierFitter()
        treatment_df = self.data[self.data[column] == value]
        not_treatment_df = self.data[self.data[column] != value]
        treatment_months = treatment_df.overall_survival_months
        not_treatment_months = not_treatment_df.overall_survival_months

        kmf.fit(treatment_months,
                event_observed=treatment_df['death_from_cancer'],
                label=value)
        ax = kmf.plot()

        kmf2 = KaplanMeierFitter()
        kmf2.fit(not_treatment_months,
                 event_observed=not_treatment_df['death_from_cancer'],
                 label=f'not {value}')
        ax = kmf2.plot(ax=ax)
        add_at_risk_counts(kmf, kmf2, ax=ax)
        ax.set_ylim([0.0, 1.0])
        ax.set_xlabel('Timeline (Months)')
        ax.set_title(f'Kaplan Meier plot in months of {column} variable')
        # plt.figure(dpi=350)
        plt.tight_layout()
        plt.show()

コード例 #5

0

ファイルを表示

ファイル: cgpb_finder.py プロジェクト: Abraxos/fim_cancer

	def __KM_analysis(self,duration_table,expressed_array,unexpressed_array,freq_set):
		data = {}
		expressed_T = []
		expressed_C = []
		unexpressed_T = []
		unexpressed_C = []
		for idx,row in enumerate(duration_table):
			if(idx>0):
				if row[0] in unexpressed_array and row[1] !=  "NA" and row[2] !=  "NA":
					unexpressed_T.append(float(row[1]))
					unexpressed_C.append(int(row[2]))
				elif row[0] in expressed_array and row[1] != "NA" and row[2] !=  "NA":
					expressed_T.append(float(row[1]))
					expressed_C.append(int(row[2]))

		results = logrank_test(expressed_T, unexpressed_T, expressed_C, unexpressed_C, alpha=.95 )
		if(results.p_value < .0006):
			ax = plt.subplot(111)
			kmf = KaplanMeierFitter()
			kmf.fit(expressed_T, event_observed=expressed_C, label="Satisfying")
			kmf.plot(ax=ax, ci_force_lines=False)
			kmf.fit(unexpressed_T, event_observed=unexpressed_C, label="None-Satisfying")
			kmf.plot(ax=ax, ci_force_lines=False)
			plt.ylim(0,1)
			plt.title("Lifespans ("+str(freq_set)+")")
			plt.show()	
		return results.p_value

コード例 #6

0

ファイルを表示

ファイル: main.py プロジェクト: aqzas/AML_cookbook

def single_submit(form):
    if form.validate_on_submit():

        database = form.DataBase.data
        Gene = form.GeneName.data
        low = int(form.Low.data)
        high = int(form.High.data)

        static = {}
        data, os, static['mean'], static['std'] = ReadData(database, Gene)

        num = len(os)
        low = max(int(num * low / 100), 1)
        high = max(int(num * high / 100), 1)

        Low, High = data[:, 1][0:low], data[:, 1][-high:]
        group1, group2 = data[:, 2][0:low], data[:, 2][-high:]

        kmf = KaplanMeierFitter()
        kmf.fit(Low, group1, label=Gene + '/low')
        ax = kmf.plot()
        kmf.fit(High, group2, label=Gene + '/high')
        kmf.plot(ax=ax)
        plt.savefig("static/test.png", bbox_inches='tight')
        plt.close()

        return render_template("single.html",
                               form=form,
                               image="test.png",
                               refresh=np.random.randn(),
                               static=static)
    else:
        return render_template("single.html",
                               form=form,
                               err=form.errors)

コード例 #7

0

ファイルを表示

def plotKM(genes):
    extractSurvivalData()
    data = np.genfromtxt("data/survival_complete.txt", delimiter='\t', dtype=str)

    # df = load_waltons()  # returns a Pandas DataFrame
    # print(df)

    df = pd.DataFrame(data, columns=['id', 'ER', 'PR', 'HER2', 'TN', 'GCH1', 'CDH1', 'CDH2', 'VIM', 'bCatenin', 'ZEB1',
                                     'ZEB2', 'TWIST1', 'SNAI1',
                                     'SNAI2', 'RET', 'NGFR', 'EGFR', 'AXL', 'STATUS', 'MONTHS'])

    kmf = KaplanMeierFitter()

    for i in range(0, 14):
        # divide the complete data set into type positive and type negative (e.g. ER+ and ER-)
        # data below contain the value of the gene

        ERP, ERN = separateLabels(df, 'ER', i, 1)
        # PRP, PRN = separateLabels(df, 'PR', i, 1)
        # HER2P, HER2N = separateLabels(df, 'HER2', i,1)
        # TNP, TNN = separateLabels(df, 'TN', i,1)

        # within each type (pos/neg), divide data into high/low gene expressions

        ERPH, ERPL = separateHighandLow(df, genes, i, ERP.values)

        # KM plot
        kmf.fit(ERPH[:, 2:3].astype(float), label='pos_high')
        ax = kmf.plot()
        kmf.fit(ERPL[:, 2:3].astype(float), label='pos_low')
        kmf.plot(ax=ax)

        plt.savefig("images/kmplot_" + genes[i])
        plt.clf()

コード例 #8

0

ファイルを表示

def kaplan_meier_curve(
    data_df: Union[pd.DataFrame, str],
    task: str = "liver",
    threshold: Union[float, List] = 0.5,
    process_dir: str = None,
):
    if isinstance(data_df, str):
        data_df = pd.read_csv(data_df)

    if isinstance(threshold, float):
        thresholds = [threshold, 1]
    else:
        thresholds = threshold
        thresholds.append(1)

    ax = plt.subplot(111)
    kmf = KaplanMeierFitter()
    prev_threshold = -1
    for threshold in thresholds:
        name = f"{task}: {prev_threshold} < y <= {threshold}"
        grouped_df = data_df[(data_df[task] > prev_threshold)
                             & (data_df[task] <= threshold)]

        kmf.fit(grouped_df["duration"], grouped_df["event"], label=name)
        kmf.plot(ax=ax)
        prev_threshold = threshold

    plt.xlabel("Follow-up time (days)")
    plt.ylabel("Probability of survival")

    if process_dir is not None:
        plt.tight_layout()
        plt.savefig(os.path.join(process_dir, f"{task}_kaplan_meier.pdf"))

コード例 #9

0

ファイルを表示

ファイル: graphSurvival.py プロジェクト: harithalakshmanan/Haritha-Research-Project

def graph(months, survival_status, has_mutation, name):

    survival_data = pd.DataFrame({
        'OS_MONTHS': months,
        'OS_STATUS': survival_status  # 0 if living, 1 if dead
    })
    #0 if don't have mutation, 1 if do have mutation in has_mutation

    ## create an kmf object
    kmf = KaplanMeierFitter()

    ## fit the data into a model for each group
    kmf.fit(survival_data.OS_MONTHS[has_mutation],
            survival_data.OS_STATUS[has_mutation],
            label="have mutation")
    layer1 = kmf.plot(ci_show=True)

    kmf.fit(survival_data.OS_MONTHS[~has_mutation],
            survival_data.OS_STATUS[~has_mutation],
            label="no mutation")
    layer2 = kmf.plot(ax=layer1, ci_show=True)

    plt.title('{} survival plot'.format(name))

    ## view plot
    plt.show()

コード例 #10

0

ファイルを表示

def surv_curve_wg(value):
    from scripts.transform_dataset import transurv

    history = join(os.path.dirname(os.getcwd()), 'Survival-analysis',
                   'datasets', 'filling_event.csv')
    machine = join(os.path.dirname(os.getcwd()), 'Survival-analysis',
                   'datasets', 'machine.csv')
    ttf = transurv(hist_url=history, mach_url=machine)

    ttf_ad = ttf[ttf['name'] == value]
    T = ttf_ad['runhour_cum']
    E = ttf_ad['event']

    fail_name = ttf_ad['fail_type'].unique()

    kmf = KaplanMeierFitter()
    fig = plt.figure(figsize=(15, 20))

    for c, num in zip(fail_name, range(1, ttf_ad['fail_type'].nunique())):
        ix = ttf_ad['fail_type'] == c
        ax = fig.add_subplot(5, 3, num)
        kmf.fit(T[ix], E[ix], label=c)
        kmf.plot(ax=ax, legend=False)
        ax.set_title(c)
        ax.set_xlabel('runhour')
        ax.axhline(y=0.5, color='r', linestyle='dashed')

    plt.tight_layout()
    plt.show()

コード例 #11

0

ファイルを表示

ファイル: survival_analysis.py プロジェクト: Identity-lab/Addicts_in_recovery-Survival_analysis

def KM_estimator(relapsed_data, censored_data):
    durations = relapsed_data + censored_data
    event_observed = list(np.ones(len(relapsed_data))) + list(np.zeros(len(censored_data)))
    ax = plt.subplot(111)
    kmf = KaplanMeierFitter()

    kmf.fit(durations, event_observed, label='kaplan-meier curve')

    axes = plt.gca()
    axes.set_ylim([0, 1])
    axes.set_xlim([0, 86])
    axes.set_position([0.16, 0.175, 0.81, 0.8])

    kmf.plot(show_censors=False, censor_styles={'ms': 3, 'marker': 's'}, ci_show=True, at_risk_counts=False)
    plt.xlabel('Time in Months', labelpad=10, fontsize=20) #, weight='bold'
    plt.ylabel('Survival Probability', labelpad=10, fontsize=20)

    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(15)
        #tick.label1.set_fontweight('bold')
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(15)
        #tick.label1.set_fontweight('bold')

    plt.savefig('km.pdf')
    plt.show()

コード例 #12

0

ファイルを表示

    def survival_plot_and_cox(self, df_arr, label=[], filename=''):
        plt.clf()
        color = ['red', 'green', 'blue', 'cyan', 'orange', 'black']

        kmf = KaplanMeierFitter()
        naf = NelsonAalenFitter()

        for a in range(len(df_arr)):
            df_el = df_arr[a]
            if a == 0:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                ax = kmf.plot(show_censors=True,
                              ci_show=False,
                              color=color[a],
                              ylim=(0, 1))
            else:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                kmf.plot(ax=ax,
                         show_censors=True,
                         ci_show=False,
                         color=color[a],
                         ylim=(0, 1))

        fig = ax.get_figure()
        fig.savefig(filename + '.png')
        fig.savefig(filename + '.pdf', format='PDF')

コード例 #13

0

ファイルを表示

ファイル: test_plotting.py プロジェクト: vtaquet/lifelines

    def test_kmf_add_at_risk_counts_with_single_row_multi_groups(
            self, block, kmf):
        T = np.random.exponential(10, size=(100))
        E = np.random.binomial(1, 0.8, size=(100))
        kmf_test = KaplanMeierFitter().fit(T, E, label="test")

        T = np.random.exponential(15, size=(1000))
        E = np.random.binomial(1, 0.6, size=(1000))
        kmf_con = KaplanMeierFitter().fit(T, E, label="con")

        fig = self.plt.figure()
        ax = fig.subplots(1, 1)

        kmf_test.plot(ax=ax)
        kmf_con.plot(ax=ax)

        ax.set_ylim([0.0, 1.1])
        ax.set_xlim([0.0, 100])
        ax.set_xlabel("Days")
        ax.set_ylabel("Survival probability")

        add_at_risk_counts(kmf_test,
                           kmf_con,
                           ax=ax,
                           rows_to_show=["At risk"],
                           ypos=-0.4)
        self.plt.title(
            "test_kmf_add_at_risk_counts_with_single_row_multi_groups")
        self.plt.tight_layout()
        self.plt.show(block=block)

コード例 #14

0

ファイルを表示

ファイル: viz.py プロジェクト: jaredleekatzman/DeepSurv

def plot_survival_curves(rec_t, rec_e, antirec_t, antirec_e, experiment_name = '', output_file = None):
    # Set-up plots
    plt.figure(figsize=(12,3))
    ax = plt.subplot(111)

    # Fit survival curves
    kmf = KaplanMeierFitter()
    kmf.fit(rec_t, event_observed=rec_e, label=' '.join([experiment_name, "Recommendation"]))   
    kmf.plot(ax=ax,linestyle="-")
    kmf.fit(antirec_t, event_observed=antirec_e, label=' '.join([experiment_name, "Anti-Recommendation"]))
    kmf.plot(ax=ax,linestyle="--")
    
    # Format graph
    plt.ylim(0,1);
    ax.set_xlabel('Timeline (months)',fontsize='large')
    ax.set_ylabel('Percentage of Population Alive',fontsize='large')
    
    # Calculate p-value
    results = logrank_test(rec_t, antirec_t, rec_e, antirec_e, alpha=.95)
    results.print_summary()

    # Location the label at the 1st out of 9 tick marks
    xloc = max(np.max(rec_t),np.max(antirec_t)) / 9
    if results.p_value < 1e-5:
        ax.text(xloc,.2,'$p < 1\mathrm{e}{-5}$',fontsize=20)
    else:
        ax.text(xloc,.2,'$p=%f$' % results.p_value,fontsize=20)
    plt.legend(loc='best',prop={'size':15})


    if output_file:
        plt.tight_layout()
        pylab.savefig(output_file)

コード例 #15

0

ファイルを表示

ファイル: test_plotting.py プロジェクト: rserran/lifelines

    def test_at_risk_looks_right_when_scales_are_magnitudes_of_order_larger_single_attribute(self, block):

        T1 = list(map(lambda v: v.right, pd.cut(np.arange(32000), 100, retbins=False)))
        T2 = list(map(lambda v: v.right, pd.cut(np.arange(9000), 100, retbins=False)))
        T3 = list(map(lambda v: v.right, pd.cut(np.arange(900), 100, retbins=False)))
        T4 = list(map(lambda v: v.right, pd.cut(np.arange(90), 100, retbins=False)))
        T5 = list(map(lambda v: v.right, pd.cut(np.arange(9), 100, retbins=False)))

        kmf1 = KaplanMeierFitter().fit(T1, label="Category A")
        kmf2 = KaplanMeierFitter().fit(T2, label="Category")
        kmf3 = KaplanMeierFitter().fit(T3, label="CatB")
        kmf4 = KaplanMeierFitter().fit(T4, label="Categ")
        kmf5 = KaplanMeierFitter().fit(T5, label="Categowdary B")

        ax = kmf1.plot()
        ax = kmf2.plot(ax=ax)
        ax = kmf3.plot(ax=ax)
        ax = kmf4.plot(ax=ax)
        ax = kmf5.plot(ax=ax)

        add_at_risk_counts(kmf1, kmf2, kmf3, kmf4, kmf5, ax=ax, rows_to_show=["At risk"])

        self.plt.title("test_at_risk_looks_right_when_scales_are_magnitudes_of_order_larger")
        self.plt.tight_layout()
        self.plt.show(block=block)

コード例 #16

0

ファイルを表示

def KaplanMeier_dash(T, C):
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=C)
    kmf.plot(title='Kaplan Meier fitter')
    kmf.plot(ci_force_lines=True, title='Kaplan Meier fitter')
    kmf1 = plt.gcf()
    pyplot(kmf1, legend=False)

コード例 #17

0

ファイルを表示

ファイル: demo_KMplot.py プロジェクト: snjsnj/4Dsurvival

def main():
    args = parse_args()
    if args.data_dir is None:
        data_dir = DATA_DIR
    else:
        data_dir = Path(args.data_dir)
    with open(str(data_dir.joinpath(args.file_name)), 'rb') as f:
        inputdata_list = pickle.load(f)
    y_orig = inputdata_list[0]
    preds_bootfull = inputdata_list[1]
    inds_inbag = inputdata_list[2]
    del inputdata_list

    preds_bootfull_mat = np.concatenate(preds_bootfull, axis=1)
    inds_inbag_mat = np.array(inds_inbag).T
    inbag_mask = 1*np.array([np.any(inds_inbag_mat==_, axis=0) for _ in range(inds_inbag_mat.shape[0])])
    preds_bootave_oob = np.divide(np.sum(np.multiply((1-inbag_mask), preds_bootfull_mat), axis=1), np.sum(1-inbag_mask, axis=1))
    risk_groups = 1*(preds_bootave_oob > np.median(preds_bootave_oob))

    wdf = pd.DataFrame(
        np.concatenate((y_orig, preds_bootave_oob[:, np.newaxis],risk_groups[:, np.newaxis]), axis=-1),
        columns=['status', 'time', 'preds', 'risk_groups'], index=[str(_) for _ in risk_groups]
    )

    kmf = KaplanMeierFitter()
    ax = plt.subplot(111)
    kmf.fit(durations=wdf.loc['0','time'], event_observed=wdf.loc['0','status'], label="Low Risk")
    ax = kmf.plot(ax=ax)
    kmf.fit(durations=wdf.loc['1','time'], event_observed=wdf.loc['1','status'], label="High Risk")
    ax = kmf.plot(ax=ax)
    plt.ylim(0,1)
    plt.title("Kaplan-Meier Plots")
    plt.xlabel('Time (days)')
    plt.ylabel('Survival Probability')

コード例 #18

0

ファイルを表示

def subsetsImpactSurvival(subsets,
                          metadata,
                          metacensorcol="overall_survival",
                          metaDFDcol="death_from_disease",
                          plot=False,
                          title=None,
                          rounding=2):
    """
    subsets is a dictionary,
    e.g.: subsets={'cluster {}'.format(i):metadata.index.isin(fitrue.columns[kmeans.labels_==i]) for i in range(4)}
    """
    kmf = KaplanMeierFitter()

    lastvalues = {}
    for subset in subsets:
        kmf.fit(metadata[metacensorcol][subsets[subset]],
                metadata[metaDFDcol][subsets[subset]],
                label=subset)
        lastvalues[subset] = (sum(subsets[subset]),
                              float(kmf.survival_function_.loc[
                                  kmf.survival_function_.last_valid_index()]))
        try:
            kmf.plot(ax=ax)
        except NameError:
            ax = kmf.plot()

    if title: ax.set_title(title)
    ax.set_ylim((0, 1))
    return lastvalues, ax

コード例 #19

0

ファイルを表示

def plot_Kaplan_Meier_feature(donor_dataset):
    '''Accepts a dataframe of donor data.  For each feature (column), it plots the Kaplan-Meier curves of the donors based on whether the feature is true or false.  The active donors ('censored') will be excluded from the plot.

    Parameters:
    donor_dataset: Pandas dataframe which contain at least the columns 'Total-years' and 'censored'.  'Total_years' represents how many years the donors have been active.  'censored' indicates whether a donor is still active (True = active donor).

    Output:
    Kaplan-Meier plot(s).

    This function does not return anything.
    '''
    T = donor_dataset['Total_years']
    C = donor_dataset['censored']
    features = list(donor_dataset.columns)
    features.remove('Total_years')
    features.remove('censored')
    features.remove('Baseline')
    kmf = KaplanMeierFitter()
    for feature in features:
        Above_mean = donor_dataset[feature] > donor_dataset[donor_dataset['censored'] == 0][feature].mean()
        fig = plt.figure(figsize=(5, 5))
        ax = fig.add_subplot(111)
        kmf = KaplanMeierFitter()
        kmf.fit(T[Above_mean], C[Above_mean], label = feature + ': Yes or > mean')
        kmf.plot(ax=ax, linewidth = 2)
        kmf.fit(T[~Above_mean], C[~Above_mean], label = feature + ': No or < mean')
        kmf.plot(ax=ax, linewidth = 2)
        ax.set_xlabel('Years', size = 10)
        ax.set_ylabel('Surviving donor population', size = 10)
        ax.set_xlim(0,40)
        ax.set_ylim(0, 1)
        ax.grid()
        ax.legend(loc = 'upper right', fontsize = 10)
        plt.show()

コード例 #20

0

ファイルを表示

ファイル: test_plotting.py プロジェクト: rserran/lifelines

 def test_kmf_with_interval_censoring_plotting(self, block):
     kmf = KaplanMeierFitter()
     left, right = load_diabetes()["left"], load_diabetes()["right"]
     kmf.fit_interval_censoring(left, right)
     kmf.plot(color="r")
     self.plt.show(block=block)
     return

コード例 #21

0

ファイルを表示

def kmplot(df_high, df_low, ax):
    kmf_high = KaplanMeierFitter()
    kmf_low = KaplanMeierFitter()
    try:
        kmf_high.fit(durations=df_high.duration,
                     event_observed=df_high.event,
                     label='High: n = ' + str(len(df_high)))
        kmf_low.fit(durations=df_low.duration,
                    event_observed=df_low.event,
                    label="Low: n = " + str(len(df_low)))
    except ValueError:
        return ("NA", "0", "0", "0", "0")
    kmf_high.plot(ax=ax, color="red", show_censors=True, ci_show=False)
    kmf_low.plot(ax=ax, color="black", show_censors=True, ci_show=False)
    statistics_result = logrank_test(df_high.duration,
                                     df_low.duration,
                                     event_observed_A=df_high.event,
                                     event_observed_B=df_low.event)
    p_value = statistics_result.p_value
    ax.set_xlabel('Time (months)')
    ax.set_ylabel('Probability')
    ax.text(0.95,
            0.02,
            'logrank P = ' + str('%.4f' % p_value),
            verticalalignment='bottom',
            horizontalalignment='right',
            transform=ax.transAxes,
            color='black',
            fontsize=11)
    plt.legend(loc=3)
    hm5 = kmf_high.predict(60)
    hm10 = kmf_high.predict(120)
    lm5 = kmf_low.predict(60)
    lm10 = kmf_low.predict(120)
    return (p_value, hm5, hm10, lm5, lm10)

コード例 #22

0

ファイルを表示

ファイル: kme.py プロジェクト: shmalex/pyjs

def plot_two_groups(data, t_col_name, e_col_name, g_name, alpha):
    '''
    functino to render the 2 groups and calculate the p values
    '''
    T = data[t_col_name]
    E = data[e_col_name]

    groups = df[g_name]

    # get unique groups to get 1st and 2nd groups names
    uniques = df[g_name].unique()

    ix = (groups == uniques[0])

    kmf = KaplanMeierFitter()
    # plot first group
    kmf.fit(T[~ix], E[~ix], label=uniques[1])
    ax = kmf.plot()

    # plot second group
    kmf.fit(T[ix], E[ix], label=uniques[0])
    kmf.plot(ax=ax)
    # get resoults for p Values
    results = logrank_test(T[ix], T[~ix], E[ix], E[~ix], alpha=alpha)
    plt.title('p-value: {0:.4f}, alpha: {1:.2f}'.format(
        results.p_value, alpha))

コード例 #23

0

ファイルを表示

ファイル: survival_analysis.py プロジェクト: gowun/MedicalDataScience

def do_KM_analysis(durations, groups, events, group_labels, xlabel=None):
    fitters = list()
    ax_list = list()
    sns.set(palette = "colorblind", font_scale = 1.35, rc = {"figure.figsize": (8, 6), "axes.facecolor": ".92"})
    
    for i, cl in enumerate(sorted(set(groups))):
        kmf = KaplanMeierFitter()
        kmf.fit(durations[groups == cl], events[groups == cl], label=group_labels[i])
        fitters.append(kmf)
        if i == 0:
            ax_list.append(kmf.plot(ci_show=False))
        elif i == len(group_labels)-1:
            kmf.plot(ax=ax_list[-1], ci_show=False)
        else:
            ax_list.append(kmf.plot(ax=ax_list[-1], ci_show=False))
        
    add_at_risk_counts(*fitters, labels=group_labels)
    ax_list[-1].set_ylim(0,1.1)
    if xlabel is not None:
        ax_list[-1].set_xlabel(xlabel)

    multi = multivariate_logrank_test(durations, groups, events)
    ax_list[-1].text(0.1, 0.01, 'P-value=%.3f'% multi.p_value)
    
    if len(set(groups)) > 2:
      pair = pairwise_logrank_test(durations, groups, events)
      pair.print_summary()
    
    plt.show()
    
    return kmf

コード例 #24

0

ファイルを表示

ファイル: km_curve.py プロジェクト: hag007/nn_sb

def km_curve(labels_ids, survival_dataset, tested_gene_expression_headers_columns, gene_group , k=None, label_index=None):
    ax = plt.subplot(111)

    kmf = KaplanMeierFitter()
    all_labels = np.array([y for x in labels_ids for y in x])
    label_event_list = []
    label_duration_list = []
    results = []
    for i, cur_labels in enumerate(labels_ids):
        label_event = survival_dataset[np.in1d(survival_dataset[:, 0], cur_labels) & np.in1d(survival_dataset[:, 0], tested_gene_expression_headers_columns), 4].astype(np.int32)
        label_duration = survival_dataset[np.in1d(survival_dataset[:, 0], cur_labels) & np.in1d(survival_dataset[:, 0], tested_gene_expression_headers_columns), 3].astype(np.int32)
        label_event_list.append(label_event)
        label_duration_list.append(label_duration)
        labels_c = all_labels[~np.in1d(all_labels,cur_labels) & np.in1d(all_labels, tested_gene_expression_headers_columns)]
        label_event_c = survival_dataset[np.in1d(survival_dataset[:, 0], labels_c), 4].astype(np.int32)
        label_duration_c = survival_dataset[np.in1d(survival_dataset[:, 0], labels_c), 3].astype(np.int32)

        lr_results = logrank_test(label_duration, label_duration_c, label_event, label_event_c, alpha=.95)
        if len(label_duration) != 0:
            kmf.fit(list(label_duration), event_observed=list(label_event), label="cluster {} n={}, logrank pval = {}".format(i,len(label_duration), '{0:1.3e}'.format(lr_results.p_value))) # '%.7f' %
            kmf.plot(ax=ax, show_censors=True)
            print "lrank cluster {} vs all: {}".format(i, lr_results.p_value)
            results.append(lr_results.p_value)
            for j, cur_duration in enumerate(label_duration_list[:-1]):
                lr_results = logrank_test(label_duration, label_duration_list[j], label_event, label_event_list[j], alpha=.95)
                print "lrank cluster {} vs cluster {}: {}".format(i, j, lr_results.p_value)
    plt.ylim(0, 1);

    plt.title("clustering survival analysis");
    plt.savefig(os.path.join(constants.BASE_PROFILE,"output" ,"cluster_by_p_{}_{}_k={}_label_i={}_{}.png".format(constants.CANCER_TYPE, gene_group.split("/")[-1],k,label_index , time.time())))
    plt.cla()

    return results

コード例 #25

0

ファイルを表示

def test_kmf_minimum_observation_bias():
    N = 250
    kmf = KaplanMeierFitter()
    T, C = exponential_survival_data(N, 0.1, scale=10)
    B = 0.01 * T
    kmf.fit(T, C, entry=B)
    kmf.plot()
    plt.title("Should have larger variances in the tails")

コード例 #26

0

ファイルを表示

def createSurvivalGraph(durations, event_observed):
    kmf = KaplanMeierFitter()
    kmf.fit(durations, event_observed)
    kmf.plot(ci_show=False)

    plt.title("Hard Drive Kaplan Meier Survival Analysis")
    plt.ylabel("Probability a Hard Drive Survives")
    plt.show()

コード例 #27

0

ファイルを表示

ファイル: kme.py プロジェクト: shmalex/pyjs

def plot_one_groupd(data, t_col_name, e_col_name, label):
    '''
    plost the KM for one group with given lable
    '''
    T = data[t_col_name]
    E = data[e_col_name]
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=E, label=label)
    kmf.plot()

コード例 #28

0

ファイルを表示

def survival_plot(df, grouping_col, time_col, event_col):
    grouped_df = df.groupby(grouping_col)
    fig, ax = plt.subplots()
    for name, index in grouped_df.groups.items():
        kmf = KaplanMeierFitter()
        kmf.fit(df.loc[index, time_col], df.loc[index, event_col], label=name)
        kmf.plot(ax=ax)
    sns.despine()
    plt.show()

コード例 #29

0

ファイルを表示

ファイル: CPH.py プロジェクト: 2bora/Cancer_conquest

def makeKMplot(cph_input, cph_group, NN_method):
    k = 0
    for inputs in cph_input:
        print(k)
        iter_num, followup, method, feature_list, x_trn, y_trn, s_trn, c_trn, x_tst, y_tst, s_tst, c_tst = inputs
        cph_head = ['S', 'E']
        for f in feature_list:
            cph_head.append(f)
        cph_head.append('group')
        iter_num, followup, method, score_slp, divide_group_slp, score_mlp, divide_group_mlp = cph_group[
            k]

        for method in NN_method:
            #make_test_df
            cph_data_test = []
            for i in range(len(x_tst)):
                row = []
                row.append(s_tst[i])
                row.append(c_tst[i])
                for j in range(0, len(feature_list)):
                    row.append(x_tst[i][j])
                if method == 'SLP':
                    if divide_group_slp[i] == 0:
                        row.append('d')
                    elif divide_group_slp[i] == 1:
                        row.append('s')
                elif method == 'MLP':
                    if divide_group_mlp[i] == 0:
                        row.append('d')
                    elif divide_group_mlp[i] == 1:
                        row.append('s')
                cph_data_test.append(row)

            cph_df_test = pd.DataFrame(cph_data_test, columns=cph_head)

            kmf = KaplanMeierFitter()
            if len(cph_df_test.loc[cph_df_test.group == 'd']) > 1:
                print('a')
                groups = cph_df_test["group"]
                T = cph_df_test["S"]
                E = cph_df_test["E"]
                ix = (groups == 'd')
                kmf.fit(T[~ix], E[~ix], label='survival')
                ax = kmf.plot()
                kmf.fit(T[ix], E[ix], label='death')
                kmf.plot(ax=ax)
                plt.title(
                    str(iter_num) + 'th trial of ' + str(followup) +
                    'year survival with ' + method)
                plt.savefig('../data/KMplot/' + str(iter_num) +
                            'th trial of ' + str(followup) +
                            'year survival with ' + method + '.png')
            else:
                pass

        k += 1

コード例 #30

0

ファイルを表示

def plot_Consensus_top_10(big_board_df, melt_df, save=False):
    """
    Plots the survival curve for the Consensus top-10 players, either displaying
    the output or saving to the `../plots` directory.

    Args:
        big_board_df (pandas DataFrame): the wide-form big board dataframe with
                                        player names and draft slots
        melt_df (pandas DataFrame): long-form big board dataframe with duration
                                    and censor columns
        save (boolean): Boolean of whether to write out the .png file in the
                        `../plots` director or display the image

    Returns:
        None
    """
    # consensus Top-10 Picks (By Average Ranking)
    top_10 = big_board_df['player'].to_list()[0:10]
    # create matplotlib figure with 10 subplots
    fig, axs = plt.subplots(nrows=10,
                            ncols=1,
                            sharey=True,
                            sharex=False,
                            figsize=(15, 32))
    # loop through top 10 players plotting each to their respective subplot
    for player, ax in zip(top_10, axs.flatten()):
        # slice to individual player
        idx = melt_df.player == player
        # fit Kaplan-Meier survival model
        kmf = KaplanMeierFitter()
        kmf.fit(melt_df.duration[idx], melt_df.observed[idx])
        # plot individual player's survival curve
        kmf.plot(ax=ax, legend=False)
        # format xticks, etc.
        ax.set(title=player, xlabel='', xlim=(0, 14), ylim=(-0.1, 1.1))
        y_vals = ax.get_yticks()
        ax.set_yticklabels(['{:3.0f}%'.format(x * 100) for x in y_vals])
        ax.set_xticks(range(0, 15))
        ax.set_xticklabels(['{0}'.format(int(x)) for x in range(1, 15)])
    # set title, axes, etc.
    fig.text(0.5, 0.001, "Draft Slot", ha="center", fontsize=18)
    fig.text(0.001,
             0.5,
             "Probability Player is Still Available",
             va="center",
             rotation="vertical",
             fontsize=18)
    fig.suptitle("Survival Curve for Consensus Top-10 Picks", fontsize=35)
    fig.tight_layout()
    fig.subplots_adjust(top=0.95)
    # either save figure or display
    if save:
        plt.savefig('../plots/top_10.png')
    else:
        plt.show()

コード例 #31

0

ファイルを表示

ファイル: pyNBS_edits.py プロジェクト: MattWallScientist/miner3

def cluster_kmplot(cluster_assign, surv, lr_test=True, verbose=False, tmax=-1):
    import seaborn as sns
    from lifelines import KaplanMeierFitter
    from lifelines.statistics import multivariate_logrank_test as multiv_lr_test
    import matplotlib.pyplot as plt

    # Initialize KM plotter
    kmf = KaplanMeierFitter()

    # Number of clusters
    clusters = sorted(list(cluster_assign.value_counts().index))
    k = len(clusters)

    #Set title
    title = "Survival plot k = " + str(k)

    # Initialize KM Plot Settings
    fig = plt.figure(figsize=(10, 7))
    ax = plt.subplot(1, 1, 1)
    colors = sns.color_palette('hls', k)
    cluster_cmap = {clusters[i]: colors[i] for i in range(k)}

    for clust in clusters:
        clust_pats = list(cluster_assign[cluster_assign == clust].index)
        if len(set(clust_pats) & set(surv.index)) < 2:
            continue
        clust_surv_data = surv.loc[clust_pats, :].dropna()
        kmf.fit(clust_surv_data.duration,
                clust_surv_data.observed,
                label='Group ' + str(clust) + ' (n=' +
                str(len(clust_surv_data)) + ')')
        kmf.plot(ax=ax, color=cluster_cmap[clust], ci_show=False)

    if tmax != -1:
        plt.xlim((0, tmax))
    plt.xlabel('Time (Days)', fontsize=16)
    plt.ylabel('Survival Probability', fontsize=16)
    _ = plt.xticks(FontSize=16)
    _ = plt.yticks(FontSize=16)
    # Multivariate logrank test
    if lr_test:
        cluster_survivals = pd.concat([surv, cluster_assign],
                                      axis=1).dropna().astype(int)
        p = multiv_lr_test(np.array(cluster_survivals.duration),
                           np.array(cluster_survivals[cluster_assign.name]),
                           t_0=tmax,
                           event_observed=np.array(
                               cluster_survivals.observed)).p_value
        if verbose:
            print('Multi-Class Log-Rank P:', p)
        plt.title(title + '\np=' + repr(round(p, 4)), fontsize=20, y=1.02)
    else:
        plt.title(title, fontsize=20, y=1.02)

    return

コード例 #32

0

ファイルを表示

def km(request, cancer, site, cut):
    levels = ll.objects.filter(sample__cancer_type=cancer,
                               sample__is_tumor=True,
                               site=site)
    ingroup = int(round(float(cut) * levels.count()))
    hgq = levels.order_by('-level')[:ingroup]
    lgq = levels.order_by('level')[:ingroup]
    days = []
    death = []
    group = []
    for dead, alive in hgq.values_list('sample__days_to_death',
                                       'sample__days_to_last_followup'):
        if dead == -1:
            if alive != '--':
                days.append(int(alive))
                death.append(False)
                group.append('h')
        else:
            days.append(dead)
            death.append(True)
            group.append('h')

    for dead, alive in lgq.values_list('sample__days_to_death',
                                       'sample__days_to_last_followup'):
        if dead == -1:
            if alive != '--':
                days.append(int(alive))
                death.append(False)
                group.append('l')
        else:
            days.append(dead)
            death.append(True)
            group.append('l')
    from pandas import DataFrame as Df
    qq = Df([days, death, group]).T
    days = qq[0]
    death = qq[1]
    groups = qq[2]
    ix = (groups == 'l')
    kmf = KaplanMeierFitter()
    kmf.fit(days[~ix], death[~ix], label='high group')
    ax = kmf.plot()
    kmf.fit(days[ix], death[ix], label='low group')
    pic_path = '/var/www/rnaedit/static/img/km/p1.png'
    fig = kmf.plot(ax=ax).get_figure()
    fig.savefig(pic_path)
    s1 = Site.objects.get(key=site)
    mt = {'cut': cut, 'site': s1, 'ig': ingroup, 'cancer': cancer}

    return render(request, "km.html", {
        'pic': pic_path.split('static/')[1],
        'meta': mt
    })

コード例 #33

0

ファイルを表示

def plot_kaplan_meier(kmf, cancer_type_list):
    kmf = KaplanMeierFitter()
    for c in cancer_type_list:
        print(c)
        aux = data.loc[data["project"] == c]
        print(aux)
        duration = aux["days_to_death"]
        observed = aux["vital_status"]
        # fill days_to_death of patients alive with the maximum value of patients not alive
        duration = duration.fillna(duration.max())
        kmf.fit(duration, observed, label=c)
        kmf.plot(ci_show=False)

コード例 #34

0

ファイルを表示

ファイル: HegemonUtil.py プロジェクト: sahoo00/Hegemon

def survival(time, status, pGroups=None):
  kmf = KaplanMeierFitter()
  if pGroups is None:
    order = [i for i in range(2, len(time)) 
		if time[i] != "" and status[i] != ""]
    t = [float(time[i]) for i in order]
    s = [int(status[i]) for i in order]
    kmf.fit(t, s)
    ax = kmf.plot(color='red')
    return ax
  else:
    ax = None
    groups = [ "" for i in time]
    for k in range(len(pGroups)):
      df = pd.DataFrame()
      order = [i for i in pGroups[k][2]
               if time[i] != "" and status[i] != ""]
      if len(order) <= 0:
          continue
      for i in order:
        groups[i] = k
      t = [float(time[i]) for i in order]
      s = [int(status[i]) for i in order]
      kmf.fit(t, s, label = pGroups[k][0])
      if ax is None:
        ax = kmf.plot(color=pGroups[k][1], ci_show=False, show_censors=True)
      else:
        ax = kmf.plot(ax = ax, color=pGroups[k][1], ci_show=False, show_censors=True)
    order = [i for i in range(len(groups)) if groups[i] != ""]
    if len(order) > 0:
      t = [float(time[i]) for i in order]
      s = [int(status[i]) for i in order]
      g = [int(groups[i]) for i in order]
      from lifelines.statistics import multivariate_logrank_test
      from matplotlib.legend import Legend
      res = multivariate_logrank_test(t, g, s)
      leg = Legend(ax, [], [], title = "p = %.2g" % res.p_value,
                   loc='lower left', frameon=False)
      ax.add_artist(leg);
    return ax

コード例 #35

0

ファイルを表示

ファイル: Gene_expression_plot_each.py プロジェクト: yuwtsri/tcga-script

def kmplot(df_high, df_low, ax):
	kmf_high = KaplanMeierFitter()
	kmf_low = KaplanMeierFitter()
	try:
		kmf_high.fit(durations = df_high.duration, event_observed = df_high.event, label = 'High: n = ' + str(len(df_high)))
		kmf_low.fit(durations = df_low.duration, event_observed = df_low.event, label = "Low: n = " + str(len(df_low)))
	except ValueError:
		return("NA", "0", "0", "0", "0")
	kmf_high.plot(ax = ax, color = "red", show_censors=True,  ci_show=False)
	kmf_low.plot(ax = ax, color = "black", show_censors=True, ci_show=False)
	statistics_result = logrank_test(df_high.duration, df_low.duration, event_observed_A = df_high.event, event_observed_B = df_low.event)
	p_value = statistics_result.p_value
	ax.set_xlabel('Time (months)')
	ax.set_ylabel('Probability')
	ax.text(0.95, 0.02, 'logrank P = ' + str('%.4f' % p_value), verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes,
        color = 'black', fontsize = 11)
	plt.legend(loc=3)
	hm5 = kmf_high.predict(60)
	hm10 = kmf_high.predict(120)
	lm5 = kmf_low.predict(60)
	lm10 = kmf_low.predict(120)
	return(p_value, hm5, hm10, lm5, lm10)

コード例 #36

0

ファイルを表示

ファイル: Aalen_KMF_plots.py プロジェクト: williamtong/donor_lifetimes_non_profit

def plot_Kaplan_Meier_overall(donor_dataset):
	'''Accepts a dataframe of donor data.  Plots the overall Kaplan-Meier curve based of the lifetime of the donors.  The active donors ('censored') will be excluded from the plot.

	Parameters:
	donor_dataset: Pandas dataframe which contain at least the columns 'Total-years' and 'censored'.  'Total_years' represents how many years the donors have been active.  'censored' indicates whether a donor is still active (True = active donor).

	Output:
	A Kaplan-Meier plot.

	This function does not return anything.

	'''
	#This produces two data frames of the columns 'Total_years'
	#and 'censored.'  The former indicates how manay years a
	#donor has donoted before she/he churned.  The latter indicates
	#whether the donor is censored (not churned).  Only donor who
	#has churned (not censored) are used because we don't know the
	#'Total_years' of donors who have not churned yet.
	T = donor_dataset['Total_years']
	C = donor_dataset['censored']

	#Create KaplanMeierInstance
	kmf = KaplanMeierFitter()
	kmf.fit(T, C, label = 'Overall')

	#plot KM function
	fig = plt.figure(figsize=(5, 5))
	ax = fig.add_subplot(111)
	kmf.plot(ax=ax)
	ax.set_xlabel('Years', size = 20)
	ax.set_ylabel('Surviving donor population', size = 20)
	ax.set_xlim(0,40)
	ax.set_ylim(0, 1)
	ax.grid()
	ax.legend(loc = 'best', fontsize = 20)
	plt.show()
	return

コード例 #37

0

ファイルを表示

ファイル: sa_test.py プロジェクト: zerchow/SurvivalAnalysisSystem

def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)

コード例 #38

0

ファイルを表示

ファイル: customer_churn.py プロジェクト: liruikaiyao/workshop

def surAnalysis(storeId):
    duration = []
    observed = []
    
    for elem in survival.find({'store_id':storeId}):
        duration.append(elem['duration']/86400)
        observed.append(elem['observed'])
    if duration==[]:
        pass
    else:
        dura_obj = array(duration)
        obs_obj = array(observed)
        
        kmf = KaplanMeierFitter()
        kmf.fit(dura_obj,obs_obj)
        ax = kmf.plot()
        #ax.set_xlim(0,1)
        #ax.set_ylim(0.85,1.0)
        ax.get_figure().savefig('F:\workshop\lbs_lyf\static\images\\' + storeId)
        plt.close(ax.get_figure())

コード例 #39

0

ファイルを表示

ファイル: LifeClass.py プロジェクト: liruikaiyao/workshop

    def __init__(self, db, male=False, female=False, other=False, both=True):
        self.db = db
        self.male = male
        self.female = female
        self.other = other
        self.both = both

        duration = []
        observed = []
        group = []

        for elem in self.db.find():
            duration.append(elem['duration'] / 86400)
            observed.append(elem['observed'])
            group.append(elem['gender'])
        dura_obj = array(duration)
        obs_obj = array(observed)
        group_obj = array(group)
        DataFrame(dura_obj, index=group_obj)
        DataFrame(obs_obj, index=group_obj)
        male = group_obj == 1
        female = group_obj == 2
        other = group_obj == 0

        kmf = KaplanMeierFitter()
        kmf.fit(dura_obj, obs_obj, label='both')
        ax = kmf.plot()
        if self.male is True:
            kmf.fit(dura_obj[male], obs_obj[male], label='male')
            kmf.plot(ax=ax)
        if self.female is True:
            kmf.fit(dura_obj[female], obs_obj[female], label='female')
            kmf.plot(ax=ax)
        if self.other is True:
            kmf.fit(dura_obj[other], obs_obj[other], label='other')
            kmf.plot(ax=ax)
        # ax.set_xlim(19,22)
        # ax.set_ylim(1,2)
        ax.get_figure().savefig('maleAndFemale')

コード例 #40

0

ファイルを表示

ファイル: ExploreSeer.py プロジェクト: georgetown-analytics/envirohealth

    def plot_survival(self):

        df = super().load_data(col  = ['YR_BRTH','AGE_DX','LATERAL','RADIATN','HISTREC','ERSTATUS','PRSTATUS','BEHANAL','HST_STGA','NUMPRIMS', 'SRV_TIME_MON', 'SRV_TIME_MON_PA', 'DTH_CLASS', 'O_DTH_CLASS', 'STAT_REC'], 
                               cond = 'SRV_TIME_MON < 1000 AND HST_STGA < 8 AND DTH_CLASS < 9 AND ERSTATUS < 4 AND PRSTATUS < 4', sample_size = 100000)

        kmf = KaplanMeierFitter()

        try:
            df.RADIATN = df.RADIATN.replace(7, 0)
            df = df[df.RADIATN < 7] 
        except Exception as err:
            pass

        # 0-negative, 1-borderline,, 2-positive
        df = df[df.ERSTATUS != 4]
        df = df[df.ERSTATUS != 9]
        df.ERSTATUS = df.ERSTATUS.replace(2, 0)
        df.ERSTATUS = df.ERSTATUS.replace(1, 2)
        df.ERSTATUS = df.ERSTATUS.replace(3, 1)

        # 0-negative, 1-borderline,, 2-positive
        df = df[df.PRSTATUS != 4]
        df = df[df.PRSTATUS != 9]
        df.PRSTATUS = df.PRSTATUS.replace(2, 0)
        df.PRSTATUS = df.PRSTATUS.replace(1, 2)
        df.PRSTATUS = df.PRSTATUS.replace(3, 1)

        rad = df.RADIATN > 0
        er  = df.ERSTATUS > 0
        pr  = df.PRSTATUS > 0

        st0  = df.HST_STGA == 0
        st1  = df.HST_STGA == 1
        st2  = df.HST_STGA == 2
        st4  = df.HST_STGA == 4

        age = df.AGE_DX < 50

        #print(df.head())
        #print(rad.head())
        #print(er.head())
        #print(st.head())

        df['SRV_TIME_YR'] = df['SRV_TIME_MON'] / 12
        T = df['SRV_TIME_YR']
        #C = (np.logical_or(df.DTH_CLASS == 1, df.O_DTH_CLASS == 1))
        C = df.STAT_REC == 4

        #print(T.head(20))
        #print(C.head(20))
        #print(df.DTH_CLASS.head(20))
        #print(df.O_DTH_CLASS.head(20))
        #print(df.describe())

         
        f, ax = plt.subplots(5, sharex=True, sharey=True)
        ax[0].set_title("Lifespans of cancer patients");

        # radiation
        kmf.fit(T[rad], event_observed=C[rad], label="Radiation")
        kmf.plot(ax=ax[0]) #, ci_force_lines=True)
        kmf.fit(T[~rad], event_observed=C[~rad], label="No Radiation")
        kmf.plot(ax=ax[0]) #, ci_force_lines=True)

        # ER Status
        kmf.fit(T[er], event_observed=C[er], label="ER Positive")
        kmf.plot(ax=ax[1]) #, ci_force_lines=True)
        kmf.fit(T[~er], event_observed=C[~er], label="ER Negative")
        kmf.plot(ax=ax[1]) #, ci_force_lines=True)

        # PR Status
        kmf.fit(T[pr], event_observed=C[pr], label="PR Positive")
        kmf.plot(ax=ax[2]) #, ci_force_lines=True)
        kmf.fit(T[~pr], event_observed=C[~pr], label="PR Negative")
        kmf.plot(ax=ax[2]) #, ci_force_lines=True)

        # stage
        kmf.fit(T[st0], event_observed=C[st0], label="Stage 0")
        kmf.plot(ax=ax[3]) #, ci_force_lines=True)
        kmf.fit(T[st1], event_observed=C[st1], label="Stage 1")
        kmf.plot(ax=ax[3]) #, ci_force_lines=True)
        kmf.fit(T[st2], event_observed=C[st2], label="Stage 2")
        kmf.plot(ax=ax[3]) #, ci_force_lines=True)
        kmf.fit(T[st4], event_observed=C[st4], label="Stage 4")
        kmf.plot(ax=ax[3]) #, ci_force_lines=True)

        # age
        kmf.fit(T[age], event_observed=C[age], label="Age < 50")
        kmf.plot(ax=ax[4]) #, ci_force_lines=True)
        kmf.fit(T[~age], event_observed=C[~age], label="Age >= 50")
        kmf.plot(ax=ax[4]) #, ci_force_lines=True)

        ax[0].legend(loc=3,prop={'size':10})
        ax[1].legend(loc=3,prop={'size':10})
        ax[2].legend(loc=3,prop={'size':10})
        ax[3].legend(loc=3,prop={'size':10})
        ax[4].legend(loc=3,prop={'size':10})

        ax[len(ax)-1].set_xlabel('Survival in years')

        f.text(0.04, 0.5, 'Survival %', va='center', rotation='vertical')
        plt.tight_layout()

        plt.ylim(0,1);
        plt.show()

        f, ax = plt.subplots(2, sharex=True, sharey=True)

        df.hist('SRV_TIME_YR', by=df.STAT_REC != 4, ax=(ax[0], ax[1]))
        ax[0].set_title('Histogram of Non Censored Patients')
        ax[0].set_ylabel('Number of Patients')

        ax[1].set_ylabel('Number of Patients')
        ax[1].set_title('Histogram of Censored Patients')
        ax[1].set_xlabel('Survival in Years')
        plt.show()

        return

        # second plot of survival

        fig, ax = plt.subplots(figsize=(8, 6))

        cen = df[df.STAT_REC != 4].SRV_TIME_MON
        nc = df[df.STAT_REC == 4].SRV_TIME_MON
        cen = cen.sort_values()
        nc = nc.sort_values()

        ax.hlines([x for x in range(len(nc))] , 0, nc , color = 'b', label='Uncensored');
        ax.hlines([x for x in range(len(nc), len(nc)+len(cen))], 0, cen, color = 'r', label='Censored');

        ax.set_xlim(left=0);
        ax.set_xlabel('Months');
        ax.set_ylim(-0.25, len(df) + 0.25);
        ax.legend(loc='best');
        plt.show()

        return

コード例 #41

0

ファイルを表示

ファイル: survival.py プロジェクト: Erin-Boehmer/p2p-loan-capstone

from lifelines import KaplanMeierFitter

import matplotlib.pyplot as plt

df = pd.read_csv('joined.csv.bz2', sep=',', compression='bz2', low_memory=False)

# strip ' months' in column 'term'
df['term'] = df['term'].map(lambda x: int(x.strip(' months')))

# prepare column 'T' for training survival model
df['T'] = df['firstMissed'] / df['term']
df.loc[df['loan_status']=='Fully Paid', 'T']=1

# column 'E' seems to be column 'censored'

T = df['T']
E = ~df['censored']


kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E) # more succiently, kmf.fit(T,E)


kmf.survival_function_
kmf.median_
kmf.plot()
plt.show()

コード例 #42

0

ファイルを表示

ファイル: decade_analysis.py プロジェクト: DataOrigami/senate-tenants

import pandas as pd
from lifelines.utils import datetimes_to_durations
from lifelines import KaplanMeierFitter


df = pd.read_csv('data/parl_data.csv')
df['start_date'] = pd.to_datetime(df['start_date'])
df['end_date'] = pd.to_datetime(df['end_date'])
df['decade'] = df['start_date'].map( lambda d: str(d.year)[:3])
T, C = datetimes_to_durations(df['start_date'], df['end_date'])
df['T'] = T
df['C'] = C

kmf = KaplanMeierFitter()

ax = subplot(111)
for decade in df['decade'].unique():
    ix = df['decade'] == decade
    kmf.fit(df.ix[ix]['T'], df.ix[ix]['C'], label=decade)
    if decade not in ('200', '199'):
        kmf.plot(ax=ax, c='#777777', ci_show=False, alpha = 0.5)
    else:
        kmf.plot(ax=ax, lw=4)

コード例 #43

0

ファイルを表示

ファイル: KaplanMeierDrugLitAssigment.py プロジェクト: griffincalme/ExperimentalScripts

#Griffin Calme
#Group 15, week 8 activity
#Kaplan Meier survival curve

import pandas as pd
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt
kmf = KaplanMeierFitter()

df = pd.DataFrame.from_csv('wk8gp15KapMeier.csv')

print(df)

groups = df['Group']
ix = (groups == 2)

T = df['SERIAL TIME (years)']
E = df['STATUS']

kmf.fit(T[~ix], E[~ix], label='1')
ax = kmf.plot()

kmf.fit(T[ix], E[ix], label='2')
kmf.plot(ax=ax, ci_force_lines=False)

plt.show()

コード例 #44

0

ファイルを表示

ファイル: ToUser.py プロジェクト: liruikaiyao/workshop

    def data_fit(self):
        user_list = []
        self.hyd_events.create_index('FromUserName')
        self.hyd_events.create_index('Event')
        self.hyd_users.create_index('openid')
        for elem in self.hyd_events.find({'Event': 'subscribe'}):
            user_list.append(elem['FromUserName'])
        user_list = list(set(user_list))
        print len(user_list)
        now_time = time.time()

        # add subscribe time
        # three tag: pic, text, event
        # format: 'user_id':'', 'sub_time':'', 'unsub_time':'', 'event':''.
        duration = []
        observed = []
        group = []

        time_block = []
        for elem in user_list:
            user_dict = {}
            for item in self.hyd_events.find({'FromUserName': elem}):
                time_block.append(item['CreateTime'])
            earlist = min(time_block)
            latest = max(time_block)
            sub_time = int(earlist)
            curt = self.hyd_events.find_one({'$and': [{'FromUserName': elem}, {'Event': 'unsubscribe'}]})
            if curt is None:
                unsub_time = int(now_time)
                user_dict['observed'] = 0
            else:
                unsub_time = int(latest)
                user_dict['observed'] = 1

            try:
                user_dict['duration'] = abs(unsub_time - sub_time)
            except Exception, e:
                print e
                print unsub_time
                print sub_time
            check = self.hyd_users.find_one({'openid': elem})
            # if gender exists, set it, if not, set gender=0, which means gender unknow
            try:
                user_dict['gender'] = check['sex']
            except TypeError:
                user_dict['gender'] = 0

            duration.append(user_dict['duration'] / 86400)
            observed.append(user_dict['observed'])
            group.append(user_dict['gender'])
            dura_obj = array(duration)
            obs_obj = array(observed)
            group_obj = array(group)
            DataFrame(dura_obj, index=group_obj)
            DataFrame(obs_obj, index=group_obj)
            male = group_obj == 1
            female = group_obj == 2
            other = group_obj == 0

            kmf = KaplanMeierFitter()
            kmf.fit(dura_obj, obs_obj, label='both')
            ax = kmf.plot()
            ax.get_figure().savefig('maleAndFemale')

コード例 #45

0

ファイルを表示

ファイル: LifePlot.py プロジェクト: liruikaiyao/workshop

duration = []
observed = []
group = []

for elem in after_users.find():
    #if elem['duration'] >=1500000:
    duration.append(elem['duration']/86400)
    observed.append(elem['observed'])
    group.append(elem['gender'])
dura_obj = array(duration)
obs_obj = array(observed)
group_obj = array(group)
DataFrame(dura_obj,index=group_obj)
DataFrame(obs_obj,index=group_obj)
male = group_obj ==1
female = group_obj ==2
other = group_obj ==0

kmf = KaplanMeierFitter()
kmf.fit(dura_obj[male],obs_obj[male], label = 'male')
ax = kmf.plot()
kmf.fit(dura_obj[female],obs_obj[female], label = 'female')
kmf.plot(ax=ax)
kmf.fit(dura_obj,obs_obj, label = 'both')
kmf.plot(ax=ax)
#kmf.fit(dura_obj[other],obs_obj[other], label = 'other')
#kmf.plot(ax=ax)
#ax.set_xlim(19,22)
#ax.set_ylim(1,2)
ax.get_figure().savefig('maleAndFemale_both_17day')

コード例 #46

0

ファイルを表示

ファイル: survival.py プロジェクト: ScienceDuck/TCGA-analysis

def plot_survival(unique_groups, grouped_data, analysis_type, censors, ci, showplot, stat_results, time='Months'):
	#plot survival curve
	kmf = KaplanMeierFitter()
	fig, ax = plt.subplots()
	n_in_groups = []

	f = open('Kaplan_%s.txt' % (analysis_type), 'a')
	f.write("\nPercent %s\n" % analysis_type)
	headers = "Group\t"
	for x in range(95,-1,-5):
		headers += str(x) + "%\t"
	f.write("%s\n" % headers)


	for i, group in enumerate(unique_groups):
		data = grouped_data.get_group(group)
		n_in_groups.append(len(data))
		# Adjust survival data from days to whatever form wanted
		if time.lower() == 'months':
			survival_time = (data['survival']/(365/12))
		elif time.lower() == 'years':
			survival_time = (data['survival']/(365))
		else:
			survival_time = data['survival']
		kmf.fit(survival_time, data['event'], label = group)
		# print(data[survival])

		# print(kmf.survival_function_)
		f.write("%s\t" % group)
		for x in range(95, -1, -5):
			f.write(str(qth_survival_times(x/100, kmf.survival_function_)) + "\t")
		f.write("\n")	

		kmf.plot(ax=ax, show_censors=censors, ci_show=ci, linewidth=2.5)

	# Make the graph pretty!
	textbox = dict(horizontalalignment = 'left', verticalalignment = 'bottom', fontname = 'Arial', fontsize = 18)
	labels = dict(horizontalalignment = 'center', verticalalignment = 'center', fontname = 'Arial', fontsize = 28)

	ax.grid(False)
	ax.set_ylim(0,1.05)
	ax.spines['left'].set_linewidth(2.5)
	ax.spines['right'].set_linewidth(2.5)
	ax.spines['top'].set_linewidth(2.5)
	ax.spines['bottom'].set_linewidth(2.5)
	ax.yaxis.set_tick_params(width=2.5)
	ax.xaxis.set_tick_params(width=2.5)
	ax.xaxis.set_ticks_position('bottom')
	ax.yaxis.set_ticks_position('left')

	# plt.title('%s' % (analysis_type), labels, y = 1.05)
	plt.xlabel('%s Post-Diagnosis' % time, labels, labelpad = 20)
	if analysis_type == 'survival':
		plt.ylabel('Overall Survival', labels, labelpad = 20)
	else:
		plt.ylabel('Relapse-Free Survival', labels, labelpad=20)
	plt.xticks(fontname = 'Arial', fontsize = 24)
	plt.yticks(fontname = 'Arial', fontsize = 24)
	ax.tick_params(axis='y', pad=10)
	ax.tick_params(axis='x', pad=10)


	legend = ax.legend(frameon=False,loc=3)
	counter=0
	for label in legend.get_texts():
		label.set_fontsize(20)
		label.set_text('%s   n=%d' % (unique_groups[counter], n_in_groups[counter]))
		counter += 1

	if len(unique_groups) == 2:	
		plt.text(0.95, 0.05, 'p = %.2g' % (stat_results.p_value), fontname='Arial', fontsize=20, ha='right', transform=ax.transAxes)

	plt.tight_layout()


	fig.savefig('Kaplan_%s.png' % analysis_type, transparent = True)
	fig.savefig('Kaplan_%s.eps' % analysis_type, transparent = True)
	if showplot == True:
		plt.show()
	plt.close(fig)

コード例 #47

0

ファイルを表示

ファイル: survival.py プロジェクト: hammerlab/cohorts

def _plot_kmf_single(df,
                     condition_col,
                     survival_col,
                     censor_col,
                     threshold,
                     title,
                     xlabel,
                     ylabel,
                     ax,
                     with_condition_color,
                     no_condition_color,
                     with_condition_label,
                     no_condition_label,
                     color_map,
                     label_map,
                     color_palette,
                     ci_show,
                     print_as_title):
    """
    Helper function to produce a single KM survival plot, among observations in df by groups defined by condition_col.

    All inputs are required - this function is intended to be called by `plot_kmf`.
    """
    # make color inputs consistent hex format
    if colors.is_color_like(with_condition_color):
        with_condition_color = colors.to_hex(with_condition_color)
    if colors.is_color_like(no_condition_color):
        no_condition_color = colors.to_hex(no_condition_color)
    ## prepare data to be plotted; producing 3 outputs:
    # - `condition`, series containing category labels to be plotted
    # - `label_map` (mapping condition values to plot labels)
    # - `color_map` (mapping condition values to plotted colors)
    if threshold is not None:
        is_median = threshold == "median"
        if is_median:
            threshold = df[condition_col].median()
        label_suffix = float_str(threshold)
        condition = df[condition_col] > threshold
        default_label_no_condition = "%s ≤ %s" % (condition_col, label_suffix)
        if is_median:
            label_suffix += " (median)"
        default_label_with_condition = "%s > %s" % (condition_col, label_suffix)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    elif df[condition_col].dtype == 'O' or df[condition_col].dtype.name == "category":
        condition = df[condition_col].astype("category")
        if not label_map:
            label_map = dict()
            [label_map.update({condition_value: '{} = {}'.format(condition_col,
                                                        condition_value)})
                     for condition_value in condition.unique()]
        if not color_map:
            rgb_values = sb.color_palette(color_palette, len(label_map.keys()))
            hex_values = [colors.to_hex(col) for col in rgb_values]
            color_map = dict(zip(label_map.keys(), hex_values))
    elif df[condition_col].dtype == 'bool':
        condition = df[condition_col]
        default_label_with_condition = "= {}".format(condition_col)
        default_label_no_condition = "¬ {}".format(condition_col)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    else:
        raise ValueError('Don\'t know how to plot data of type\
                         {}'.format(df[condition_col].dtype))

    # produce kmf plot for each category (group) identified above
    kmf = KaplanMeierFitter()
    grp_desc = list()
    grp_survival_data = dict()
    grp_event_data = dict()
    grp_names = list(condition.unique())
    for grp_name, grp_df in df.groupby(condition):
        grp_survival = grp_df[survival_col]
        grp_event = (grp_df[censor_col].astype(bool))
        grp_label = label_map[grp_name]
        grp_color = color_map[grp_name]
        kmf.fit(grp_survival, grp_event, label=grp_label)
        desc_str = "# {}: {}".format(grp_label, len(grp_survival))
        grp_desc.append(desc_str)
        grp_survival_data[grp_name] = grp_survival
        grp_event_data[grp_name] = grp_event
        if ax:
            ax = kmf.plot(ax=ax, show_censors=True, ci_show=ci_show, color=grp_color)
        else:
            ax = kmf.plot(show_censors=True, ci_show=ci_show, color=grp_color)

    ## format the plot
    # Set the y-axis to range 0 to 1
    ax.set_ylim(0, 1)
    y_tick_vals = ax.get_yticks()
    ax.set_yticklabels(["%d" % int(y_tick_val * 100) for y_tick_val in y_tick_vals])
    # plot title
    if title:
        ax.set_title(title)
    elif print_as_title:
        ax.set_title(' | '.join(grp_desc))
    else:
        [print(desc) for desc in grp_desc]
    # axis labels
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    
    ## summarize analytical version of results
    ## again using same groups as are plotted
    if len(grp_names) == 2:
        # use log-rank test for 2 groups
        results = logrank_test(grp_survival_data[grp_names[0]],
                               grp_survival_data[grp_names[1]],
                               event_observed_A=grp_event_data[grp_names[0]],
                               event_observed_B=grp_event_data[grp_names[1]])
    elif len(grp_names) == 1:
        # no analytical result for 1 or 0 groups
        results = NullSurvivalResults()
    else:
        # cox PH fitter for >2 groups
        cf = CoxPHFitter()
        cox_df = patsy.dmatrix('+'.join([condition_col, survival_col,
                                         censor_col]),
                               df, return_type='dataframe')
        del cox_df['Intercept']
        results = cf.fit(cox_df, survival_col, event_col=censor_col)
        results.print_summary()
    # add metadata to results object so caller can print them
    results.survival_data_series = grp_survival_data
    results.event_data_series = grp_event_data
    results.desc = grp_desc
    return results

コード例 #48

0

ファイルを表示

ファイル: kaplanmeier.py プロジェクト: ctwardy/SARbayes-fork

def execute():
    matplotlib.rc("font", size=20)

    engine, session = database.initialize("sqlite:///../data/isrid-master.db")

    # Query with Group.size may take awhile, at least for Charles
    # Not sure why
    query = session.query(Incident.total_hours, Subject.survived, Group.category, Group.size).join(Group, Subject)
    print("Tabulating query... may take awhile for unknown reasons.")
    df = tabulate(query)
    print("Done tabulating.")
    print(df.describe())
    database.terminate(engine, session)

    df = df.assign(
        days=[total_hours.total_seconds() / 3600 / 24 for total_hours in df.total_hours],
        doa=[not survived for survived in df.survived],
    )
    df = df[0 <= df.days]

    rows, columns = 2, 2
    grid, axes = plt.subplots(rows, columns, figsize=(15, 10))

    categories = Counter(df.category)
    plot = 0
    kmfs = []
    options = {"show_censors": True, "censor_styles": {"marker": "|", "ms": 6}, "censor_ci_force_lines": False}

    for category, count in categories.most_common()[: rows * columns]:
        print("Category:", category)
        ax = axes[plot // columns, plot % columns]
        df_ = df[df.category == category]
        N, Ndoa = len(df_), sum(df_.doa)
        Srate = 100 * (1 - Ndoa / N)
        grp = df_[df_.size > 1]
        sng = df_[df_.size == 1]
        kmf = KaplanMeierFitter()
        # kmf.fit(df_.days, event_observed=df_.doa, label=category)
        # kmf.plot(ax=ax, ci_force_lines=True)
        kmf.fit(grp.days, event_observed=grp.doa, label=category + " Groups")
        kmf.plot(ax=ax, **options)
        kmf.fit(sng.days, event_observed=sng.doa, label=category + " Singles")
        kmf.plot(ax=ax, **options)
        kmfs.append(kmf)

        ax.set_xlim(0, min(30, 1.05 * ax.get_xlim()[1]))
        ax.set_ylim(0, 1)
        ax.set_title("{}, N = {}, DOA = {}, {:.0f}% surv".format(category, N, Ndoa, Srate))
        ax.set_xlabel("Total Incident Time (days)")
        ax.set_ylabel("Probability of Survival")

        # ax.legend_.remove()
        # ax.grid(True)

        plot += 1

    grid.suptitle("Kaplan-Meier Survival Curves", fontsize=25)
    grid.tight_layout()
    grid.subplots_adjust(top=0.9)
    grid.savefig("../doc/figures/kaplan-meier/km-grid-large.svg", transparent=True)

    combined = plt.figure(figsize=(15, 10))
    ax = combined.add_subplot(1, 1, 1)
    for kmf in kmfs[: rows * columns]:
        kmf.plot(ci_show=False, show_censors=True, censor_styles={"marker": "|", "ms": 6}, ax=ax)

    ax.set_xlim(0, 15)
    ax.set_ylim(0, 1)
    ax.set_xlabel("Total Incident Time (days)")
    ax.set_ylabel("Probability of Survival")
    ax.set_title("Kaplan-Meier Survival Curves", fontsize=25)
    ax.grid(True)
    combined.savefig("../doc/figures/kaplan-meier/km-combined-large.svg", transparent=True)

    plt.show()

コード例 #49

0

ファイルを表示

ファイル: survival.py プロジェクト: arahuja/cohorts

def plot_kmf(df, 
             condition_col, 
             censor_col, 
             survival_col, 
             threshold=None,
             title=None,
             xlabel=None,
             ax=None,
             print_as_title=False):
    """
    Plot survival curves by splitting the dataset into two groups based on
    condition_col

    if threshold is defined, the groups are split based on being > or <
    condition_col

    if threshold == 'median', the threshold is set to the median of condition_col

    Parameters
    ----------
        df: dataframe
        condition_col: string, column which contains the condition to split on
        survival_col: string, column which contains the survival time
        censor_col: string,
        threshold: int or string, if int, condition_col is thresholded,
                                  if 'median', condition_col thresholded 
                                  at its median
        title: Title for the plot, default None
        ax: an existing matplotlib ax, optional, default None
        print_as_title: bool, optional, whether or not to print text
          within the plot's title vs. stdout, default False
    """
    kmf = KaplanMeierFitter()
    if threshold is not None:
        if threshold == 'median':
            threshold = df[condition_col].median()
        condition = df[condition_col] > threshold
        label = '{} > {}'.format(condition_col, threshold)
    else:
        condition = df[condition_col]
        label = '{}'.format(condition_col)

    df_with_condition = df[condition]
    df_no_condition = df[~condition]
    survival_no_condition = df_no_condition[survival_col]
    survival_with_condition = df_with_condition[survival_col]

    event_no_condition = (df_no_condition[censor_col].astype(bool))
    event_with_condition = (df_with_condition[censor_col].astype(bool))
             
    kmf.fit(survival_no_condition, event_no_condition, label="")
    if ax:
        kmf.plot(ax=ax, show_censors=True, ci_show=False)
    else:
        ax = kmf.plot(show_censors=True, ci_show=False)

    kmf.fit(survival_with_condition, event_with_condition, label=(label))
    kmf.plot(ax=ax, show_censors=True, ci_show=False)

    # Set the y-axis to range 0 to 1
    ax.set_ylim(0, 1)

    no_cond_str = "# no condition {}".format(len(survival_no_condition))
    cond_str = "# with condition {}".format(len(survival_with_condition))
    if title:
        ax.set_title(title)
    elif print_as_title:
        ax.set_title("%s | %s" % (no_cond_str, cond_str))
    else:
        print(no_cond_str)
        print(cond_str)

    if xlabel:
        ax.set_xlabel(xlabel)
 
    results = logrank_test(survival_no_condition, 
                           survival_with_condition, 
                           event_observed_A=event_no_condition, 
                           event_observed_B=event_with_condition)
    return results

コード例 #50

0

ファイルを表示

ファイル: kmna.py プロジェクト: xcodevn/SADP

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

from lifelines import KaplanMeierFitter
from lifelines import NelsonAalenFitter

kmf = KaplanMeierFitter()
(T, E) = zip(*data)
kmf.fit(T, event_observed=E)
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)
ax = pyplot.subplot(121)
naf.plot(ax=ax)

ax = pyplot.subplot(122)
kmf.plot(ax=ax)

print naf.cumulative_hazard_
naf.cumulative_hazard_.to_csv("naf.csv")

pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]

his0,bin_edges0 = np.histogram(data0, bins=bins0, range=(config.GAMMA, 1))
his1,bin_edges1 = np.histogram(data1, bins=bins1, range=(config.GAMMA, 1))

his = np.append(his0, his1)

ps = []