Esempio n. 1
0
def test_pairwise_allows_dataframes():
    N = 100
    df = pd.DataFrame(np.empty((N, 3)), columns=["T", "C", "group"])
    df["T"] = np.random.exponential(1, size=N)
    df["C"] = np.random.binomial(1, 0.6, size=N)
    df["group"] = np.random.binomial(2, 0.5, size=N)
    stats.pairwise_logrank_test(df['T'], df["group"], event_observed=df["C"])
Esempio n. 2
0
def test_pairwise_allows_dataframes():
    N = 100
    df = pd.DataFrame(np.empty((N, 3)), columns=["T", "C", "group"])
    df["T"] = np.random.exponential(1, size=N)
    df["C"] = np.random.binomial(1, 0.6, size=N)
    df["group"] = np.random.binomial(2, 0.5, size=N)
    stats.pairwise_logrank_test(df['T'], df["group"], event_observed=df["C"])
Esempio n. 3
0
    def significance_results(self,
                             dataframe,
                             durations,
                             group,
                             event_observed=None):
        """
        Fit the model to right censored data and plot survival function
        Parameters
        ----------
          dataframe: Pandas dataframe being fit. Should have columns with durations, group and event observed.
          durations: Time to event for data point. For example, if the event is defined as "when supply for a clothing line is over" and say Women's size Small Tshirt supplies last for 45 days, then duration is 45.
          group: Column of dataframe that represents strata of dataframe (such as clusters or customer segments)
          event_observed: Whether event was observed (True) or not (False).
        Returns
        -------
        Table showing significance results at the 0.05 level.
        """
        df_pairwise = pd.DataFrame()
        if event_observed is not None:
            df_pairwise = pairwise_logrank_test(
                dataframe['durations'], dataframe['segments'],
                dataframe['event_observed']).summary[['p']]
        else:
            df_pairwise = pairwise_logrank_test(
                dataframe['durations'], dataframe['segments']).summary[['p']]
        df_pairwise.columns = ['p-value']
        df_pairwise = df_pairwise.style.applymap(color_negative_red)

        return df_pairwise
def do_KM_analysis(durations, groups, events, group_labels, xlabel=None):
    fitters = list()
    ax_list = list()
    sns.set(palette = "colorblind", font_scale = 1.35, rc = {"figure.figsize": (8, 6), "axes.facecolor": ".92"})
    
    for i, cl in enumerate(sorted(set(groups))):
        kmf = KaplanMeierFitter()
        kmf.fit(durations[groups == cl], events[groups == cl], label=group_labels[i])
        fitters.append(kmf)
        if i == 0:
            ax_list.append(kmf.plot(ci_show=False))
        elif i == len(group_labels)-1:
            kmf.plot(ax=ax_list[-1], ci_show=False)
        else:
            ax_list.append(kmf.plot(ax=ax_list[-1], ci_show=False))
        
    add_at_risk_counts(*fitters, labels=group_labels)
    ax_list[-1].set_ylim(0,1.1)
    if xlabel is not None:
        ax_list[-1].set_xlabel(xlabel)

    multi = multivariate_logrank_test(durations, groups, events)
    ax_list[-1].text(0.1, 0.01, 'P-value=%.3f'% multi.p_value)
    
    if len(set(groups)) > 2:
      pair = pairwise_logrank_test(durations, groups, events)
      pair.print_summary()
    
    plt.show()
    
    return kmf
Esempio n. 5
0
def test_pairwise_logrank_test_with_identical_data_returns_inconclusive():
    t = np.random.exponential(10, size=100)
    T = np.tile(t, 3)
    g = np.array([1, 2, 3]).repeat(100)
    R = stats.pairwise_logrank_test(T, g, alpha=0.99).applymap(
        lambda r: r.is_significant if r is not None else None)
    V = np.array([[None, False, False], [False, None, False],
                  [False, False, None]])
    npt.assert_array_equal(R.values, V)
def test_pairwise_allows_dataframes_and_gives_correct_counts():
    N = 100
    N_groups = 5
    df = pd.DataFrame(np.empty((N, 3)), columns=["T", "C", "group"])
    df["T"] = np.random.exponential(1, size=N)
    df["C"] = np.random.binomial(1, 0.6, size=N)
    df["group"] = np.tile(np.arange(N_groups), 20)
    R = stats.pairwise_logrank_test(df["T"], df["group"], event_observed=df["C"])
    assert R.summary.shape[0] == N_groups * (N_groups - 1) / 2
def comparisons(df, print=True):
    res = pairwise_logrank_test(
        event_durations=df.time,
        event_observed=df.spotted,
        groups=df.domain_system,
    )

    if print:
        res.print_summary()

    return res
Esempio n. 8
0
def kaplanmeier_stats_multi(dataframe, grouping, analysis_type):
	results = pairwise_logrank_test(event_durations=dataframe['survival'], groups=dataframe[grouping], event_observed=dataframe['event'], alpha=0.95, t_0=-1, bonferri=True)
	with open('Kaplan_%s.txt' % (analysis_type), 'w') as f:
		f.write('Test\tP-Value\tSignificant\n')
		for x in results:
			for y in results[x]:
				# print(y)
				if y != None:
					# print(y.test_name)
					# print(y.p_value)
					# print(y.is_significant)
					f.write('%s\t%4g\t%s\n' % (y.test_name, y.p_value, y.is_significant))
Esempio n. 9
0
def aggragate_clusters_on_pvalue(clusters, survival, status):
    new_clusters = clusters.copy()
    p_values = []
    many = []
    layers = []
    while len(np.unique(new_clusters)) > 1:
        numbers_at_risk.append([])
        res_pair = pairwise_logrank_test(survival, new_clusters, status)
        pairs = {}
        for i in range(res_pair.shape[0]):
            for j in range(i + 1, res_pair.shape[0]):
                pairs[res_pair.index[i],
                      res_pair.columns[j]] = res_pair.iloc[i, j].p_value
        sorted_x = sorted(pairs.items(), key=operator.itemgetter(1))
        e = sorted_x[-1]
        aux = new_clusters.copy()
        aux[np.where(new_clusters == e[0][1])[0]] = e[0][0]
        layers.append(aux)
        new_clusters = aux.copy()
        res = multivariate_logrank_test(survival, new_clusters, status)
        many.append(len(np.unique(new_clusters)))
        p_values.append(res.p_value)

    return zip(layers, many, p_values)
Esempio n. 10
0
plt.xticks(np.arange(0, 30, step = 5))
plt.title("Time spent on Billboard Hot 100 Chart before first exit")
plt.savefig('Billboard Hot 100 - Kaplan-Meier Plot by Year 2014 to 2017.png')
plt.show()   
   
#test if the years 2017 and 2016 are different to a statistically significant level
results_2017_2016 = logrank_test(recent_years['Total Weeks in First Appearance'][year_mask_2017], recent_years['Total Weeks in First Appearance'][year_mask_2016], event_observed_A=recent_years['Event Observed'][year_mask_2017], event_observed_B=recent_years['Event Observed'][year_mask_2016])
#results_2017_2016.print_summary()
#They give a p-value of .2495 so are not statistically significant

#test if the years 2016 and 2015 are different to a statistically significant level
results_2016_2015 = logrank_test(recent_years['Total Weeks in First Appearance'][year_mask_2016], recent_years['Total Weeks in First Appearance'][year_mask_2015], event_observed_A=recent_years['Event Observed'][year_mask_2016], event_observed_B=recent_years['Event Observed'][year_mask_2015])
#results_2016_2015.print_summary()
#They give a p-value of .5160 so are not statistically significant

results_pw = pairwise_logrank_test(recent_years['Total Weeks in First Appearance'], recent_years['Entry Year'], recent_years['Event Observed'])

print(results_pw.iloc[0,1])
print(results_pw.iloc[0,2])
print(results_pw.iloc[0,3])
print(results_pw.iloc[1,2])
print(results_pw.iloc[1,3])
print(results_pw.iloc[2,3])


#EXAMINE DATA BY DECADE
plt.figure(3)
ax = plt.subplot(111)

year_mask_201 = (by_song['Entry Decade'] == 201)
kmf_201 = KaplanMeierFitter()
Esempio n. 11
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset['T'],
                                    waltons_dataset['group'])
    assert R.values[0, 1].is_significant
Esempio n. 12
0
def do_cum_incidence(gtdf, label):
    # Calculate cumulative wing extension since last laser on.
    # (This is ugly...)

    gtdf['cum_wei_this_trial'] = 0
    gtdf['head_trial'] = 0
    gtdf['thorax_trial'] = 0
    obj_ids = gtdf['obj_id'].unique()
    for obj_id in obj_ids:
        prev_laser_state = 0
        cur_cum_this_trial = 0
        cur_head_trial = 0
        cur_thorax_trial = 0
        cur_ttm = 0
        cur_state = 'none'
        prev_t = -np.inf
        for rowi in range(len(gtdf)):
            row = gtdf.iloc[rowi]
            if row['obj_id'] != obj_id:
                continue
            assert row['t'] > prev_t
            prev_t = row['t']
            if prev_laser_state == 0 and row['laser_state']:
                # new laser pulse, reset cum
                cur_cum_this_trial = 0
                if row['ttm'] < 0:
                    cur_head_trial += 1
                    cur_state = 'head'
                elif row['ttm'] > 0:
                    cur_thorax_trial += 1
                    cur_state = 'thorax'
            prev_laser_state = row['laser_state']
            if row['zx'] > 0:
                cur_cum_this_trial = 1
            #gtdf['cum_wei_this_trial'] = cur_cum_this_trial
            row['cum_wei_this_trial'] = cur_cum_this_trial
            if cur_state == 'head':
                row['head_trial'] = cur_head_trial
            elif cur_state == 'thorax':
                row['thorax_trial'] = cur_thorax_trial
            gtdf.iloc[rowi] = row

    fig_cum = plt.figure('cum indx: %s' % label, figsize=(6, 5))
    ax_cum = fig_cum.add_subplot(111)
    pulse_nums = [1, 2, 3]
    this_data = {}
    head_data = []
    thorax_data = []

    sdx_e = []
    sdx_g = []
    sdx_c = []

    for pulse_num in pulse_nums:
        head_pulse_df = gtdf[gtdf['head_trial'] == pulse_num]
        thorax_pulse_df = gtdf[gtdf['thorax_trial'] == pulse_num]

        h = _do_group(head_pulse_df)
        t = _do_group(thorax_pulse_df)
        sdx_e, sdx_g, sdx_c = _update_latencies(head_pulse_df, sdx_e, sdx_g,
                                                sdx_c, 'head')
        sdx_e, sdx_g, sdx_c = _update_latencies(thorax_pulse_df, sdx_e, sdx_g,
                                                sdx_c, 'thorax')
        this_data['head%d' % pulse_num] = h
        this_data['thorax%d' % pulse_num] = t
        head_data.append(h)
        thorax_data.append(t)
        plot_cum(
            ax_cum,
            h,  #label='head %d'%pulse_num,
            lw=0.5,
            color=COLORS[HEAD])
        plot_cum(
            ax_cum,
            t,  #label='thorax %d'%pulse_num,
            lw=0.5,
            color=COLORS[THORAX])
    all_head_data = combine_cum_data(head_data)
    all_thorax_data = combine_cum_data(thorax_data)
    plot_cum(ax_cum, all_head_data, label='head', lw=2, color=COLORS[HEAD])
    plot_cum(ax_cum,
             all_thorax_data,
             label='thorax',
             lw=2,
             color=COLORS[THORAX])
    ax_cum.legend()

    sdx_e = np.array(sdx_e)
    sdx_g = np.array(sdx_g)
    sdx_c = np.array(sdx_c)

    buf = ''
    alpha = 0.95
    try:
        S, P, T = pairwise_logrank_test(sdx_e, sdx_g, sdx_c, alpha=alpha)
    except np.linalg.linalg.LinAlgError:
        buf += 'numerical errors computing logrank test'
    else:
        buf += '<h1>%s</h1>\n' % label
        buf += '<h2>pairwise logrank test</h2>\n'
        buf += '  analyses done using the <a href="http://lifelines.readthedocs.org">lifelines</a> library\n'
        buf += P._repr_html_()
        buf += '<h3>significant at alpha=%s?</h3>\n' % alpha
        buf += T._repr_html_()

    n_head = len([x for x in sdx_g if x == 'head'])
    n_thorax = len([x for x in sdx_g if x == 'thorax'])
    p_value = P['head']['thorax']

    return {
        'fig': fig_cum,
        'ax': ax_cum,
        'n_head': n_head,
        'n_thorax': n_thorax,
        'p_value': p_value,
        'buf': buf,
    }
Esempio n. 13
0
                #kmf.fit(latency_arr, censorship=C, label=name_key )
                for i in range(len(C)):
                    sdx_e.append(latency_arr[i])
                    sdx_g.append(name_key)
                    sdx_c.append(C[i])

            # --- stats --------------------------------

            sdx_e = np.array(sdx_e)
            sdx_g = np.array(sdx_g)
            sdx_c = np.array(sdx_c)

            alpha = 0.95
            try:
                S, P, T = pairwise_logrank_test(sdx_e,
                                                sdx_g,
                                                sdx_c,
                                                alpha=alpha)
            except np.linalg.linalg.LinAlgError:
                buf += 'numerical errors computing logrank test'
            else:
                buf += '<h2>pairwise logrank test</h2>\n'
                buf += '  analyses done using the <a href="http://lifelines.readthedocs.org">lifelines</a> library\n'
                buf += P._repr_html_()
                buf += '<h3>significant at alpha=%s?</h3>\n' % alpha
                buf += T._repr_html_()
            '''
            clipped = df_all.copy()
            clipped['latency'] = clipped['latency'].clip(upper=MAX_LATENCY)
            group_info = label_homogeneous_groups_pandas( clipped,
                                                          groupby_column_name='name_key',
                                                          value_column_name='latency')
Esempio n. 14
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset["T"],
                                    waltons_dataset["group"])
    assert R.values[0, 1].p_value < 0.05
Esempio n. 15
0
    kmf.fit(T[ix], E[ix], label=each)
    if i == 0:
        ax = kmf.plot(ci_show=False)

    else:
        ax = kmf.plot(ax=ax, ci_show=False)
ax.set_title(r"Survival Curves for Different Subjectwise")
# Log rank Test for differences in survival curves
from lifelines import statistics
df_survival_test = pandas.DataFrame({
    'durations': T,
    'events': E,
    'groups': groups
})
result = statistics.pairwise_logrank_test(df_survival_test['durations'],
                                          df_survival_test['groups'],
                                          df_survival_test['events'])

result.test_statistic
result.p_value
result.print_summary()

# Given that math and science are significantly different, we need to see if there is any difference in
# their features of teachers of math and science from EL survey data.
# Codes:
# teacher_gender: male - 1, female - 2
# edu_qual: Phd, Mphil, DoublePG, PG, Grad, Sec, HrSec
# qual_bechalore: Indian_languages, eng, math, sci, soc_stud, other, NA
# qual_master: Indian_languages, eng, math, sci, soc_stud, other, NA
# teacher_profqual: MEd, BEd, DEd, other, no_prof_qual
# teacher_sub: indian_languages, eng, math, sci, soc_stud, other, NA
Esempio n. 16
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset['T'], waltons_dataset['group'])
    assert R.values[0, 1].p_value < 0.05
Esempio n. 17
0
def KM_all_vars(features, outcomes):
    T = outcomes['duration_mortality']
    E = outcomes['event_mortality']

    is_binary = features.columns[['ja' in f for f in features.columns]]
    is_binary = is_binary.append(
        features.columns[['positief' in f for f in features.columns]])
    is_binary = is_binary.append(features.columns[[
        'Andere infectieuze ademhalingsdiagnose' in f for f in features.columns
    ]])
    is_binary = is_binary.append(features.columns[[
        'oxygen_saturation_on_cat_' in f for f in features.columns
    ]])
    is_float = [
        'ALAT (U/L)', 'ASAT (U/L)', 'Ureum (mmol/L)', 'Albumine (g/L)',
        'Ca (totaal) (mmol/L)', 'Creatinine (µmol/L)', 'glucose (mmol/L)',
        'Hb (mmol/L)', 'hart frequentie (1/min)', 'LDH (U/L)',
        'Lactaat (mmol/L)', 'Lymfocyten (x10^9/L)', 'Neutrofielen (x10^9/L)',
        'pCO2 (kPa)', 'Zuurgraad (pH)', 'PaO2 arteriëel (kPa)',
        'trombocyten (x10^9/L)', 'Kalium (mmol/L)', 'SaO2 (%)',
        'Natrium (mmol/L)', 'Temperatuur (ºC)', 'totaal Bilirubine (IE)',
        'Leukocyten (x10^3/µL)', 'CK (U/L)', 'CRP (mg/L)',
        'Tijd sinds eerste klachten (dagen)', 'Diastolische bloeddruk (mmHg)',
        'ferritine (mg/L)', 'FiO2 (%)', 'Zuurstof saturatie (%)',
        'Ademhalingsfrequentie (1/min)', 'Systolische bloeddruk (mmHg)',
        'Aantal thuismedicamenten'
    ]
    is_categorical_corads = [
        'corads_admission_cat_1', 'corads_admission_cat_2',
        'corads_admission_cat_3', 'corads_admission_cat_4',
        'corads_admission_cat_5'
    ]
    is_categorical_male_female = ['gender_cat_1', 'gender_cat_2']
    ngroups = 3
    statres = []
    for f in features.columns:
        print(f)
        kmfs = None
        fig, axes = plt.subplots(1, 1)
        features[f] = features[f].astype(float)
        if f in is_binary:
            yes = features[f] == 1
            if sum(yes) > 0:
                kmf_yes = KaplanMeierFitter().fit(T[yes],
                                                  E[yes],
                                                  label='Ja (n={})'.format(
                                                      np.nansum(yes)))

            no = features[f] == 0
            if sum(no) > 0:
                kmf_no = KaplanMeierFitter().fit(T[no],
                                                 E[no],
                                                 label='Nee (n={})'.format(
                                                     np.nansum(no)))

            na = features[f].isna()
            if sum(na) > 0:
                kmf_na = KaplanMeierFitter().fit(
                    T[na],
                    E[na],
                    label='Onbekend (n={})'.format(np.nansum(na)))

            kmfs = [kmf_yes, kmf_no, kmf_na]
            # event_durations (iterable) – a (n,) list-like representing the (possibly partial) durations of all individuals
            # groups (iterable) – a (n,) list-like of unique group labels for each individual.
            # event_observed (iterable, optional) – a (n,) list-like of event_observed events: 1 if observed death, 0 if censored. Defaults to all observed.
            # t_0 (float, optional (default=-1)) – the period under observation, -1 for all time.
            groups = features[f].astype("category")
            event_durations = T
            event_observed = E

        elif f in is_float:
            ff = pd.qcut(features[f], ngroups, labels=np.arange(ngroups) + 1)
            kmfs = []
            for q in ff.unique():
                sel = ff == q
                if sum(sel) > 0:
                    kmf_q = KaplanMeierFitter().fit(T[sel],
                                                    E[sel],
                                                    label='{} (n={})'.format(
                                                        q, np.nansum(sel)))
                    kmfs += [kmf_q]
            sel = ff.isna()
            if sum(sel) > 0:
                q = 'Onbekend'
                kmf_q = KaplanMeierFitter().fit(T[sel],
                                                E[sel],
                                                label='{} (n={})'.format(
                                                    q, np.nansum(sel)))
                kmfs += [kmf_q]
            groups = ff
            event_durations = T
            event_observed = E

        elif f in is_categorical_corads:
            kmfs = []
            for f in is_categorical_corads:
                sel = features[f]
                kmf_f = KaplanMeierFitter().fit(T[sel],
                                                E[sel],
                                                label='{} (n={})'.format(
                                                    f, np.nansum(sel)))
                kmfs += [kmf_f]
            na = features[is_categorical_corads].any(axis=1) == False
            if sum(na) > 0:
                kmf_na = KaplanMeierFitter().fit(
                    T[na],
                    E[na],
                    label='Onbekend (n={})'.format(np.nansum(na)))
            groups = is_categorical_corads  #.astype("category")
            event_durations = T
            event_observed = E

            statres += [None]  # FIXME
            continue  # FIXME

        elif f in is_categorical_male_female:
            kmfs = []
            for f in is_categorical_male_female:
                sel = features[f] == 1.0
                kmf_f = KaplanMeierFitter().fit(T[sel],
                                                E[sel],
                                                label='{} (n={})'.format(
                                                    f, np.nansum(sel)))
                kmfs += [kmf_f]
            na = features[is_categorical_male_female].any(axis=1) == False
            if sum(na) > 0:
                kmf_na = KaplanMeierFitter().fit(
                    T[na],
                    E[na],
                    label='Onbekend (n={})'.format(np.nansum(na)))
            groups = is_categorical_male_female  #.astype("category")
            event_durations = T
            event_observed = E

            statres += [None]  # FIXME
            continue  # FIXME

        else:
            # split data in X groups
            print('{} not implemented'.format(f))

        groups = groups.cat.add_categories(-1).fillna(
            -1)  # treat nan as seperate group.

        if len(np.unique(groups)) < 5:
            ss = [
                pairwise_logrank_test(event_durations,
                                      groups,
                                      event_observed,
                                      t_0=21.)
            ]
            p = np.min([s.p_value for s in ss])
            print(p)
            if p * 289 < 0.05:
                print('{} < 0.05 (p: , corrected p: {})'.format(f, p, p * 289))
            statres += ss
        else:
            print('error in groups: {} - {}'.format(f, groups))

        if kmfs:
            [k.plot_survival_function() for k in kmfs]

            axes.set_xticks([1, 5, 9, 13, 17, 21])
            axes.set_xticklabels(['1', '5', '9', '13', '17', '21'])
            axes.set_xlabel('Aantal dagen sinds opnamedag')
            axes.set_ylabel('Proportie overlevend')

            axes.set_xlim(0, 21)
            axes.set_ylim(0, 1)
        plt.title('Kaplan-Meier survival - {}'.format(f))
        plt.tight_layout()

        filename = clean_filename('KM_{}.png'.format(f))
        plt.show()
        fig.savefig(os.path.join('km_curves', filename),
                    format='png',
                    dpi=300,
                    figsize=(20, 20),
                    pad_inches=0,
                    bbox_inches='tight')

    return statres
Esempio n. 18
0
def generate_pdf(bioassay_name_pattern):
    # Mod by Aubrey Moore 2019-04-17.
    # The following sql was modified to solve a problem.
    # The modified sql does a selects records where the full bioassay name is matcched.
    # Previously, a search for 'PNG' matched [PNG-1, PNG-2, PNG-3, PNGperOS-1, PNGperOS-2, PNGperOS-3]
    # Now, a search for 'PNG' matches only [PNG-1, PNG-2, PNG-3].
    #sql = "SELECT * FROM btl WHERE bioassay_name LIKE '%{}%'".format(bioassay_name)

    bioassay_name = bioassay_name_pattern.replace('%', '')

    sql = "SELECT * FROM btl WHERE bioassay_name LIKE '{}'".format(
        bioassay_name_pattern)
    rows = db.executesql(sql, as_dict=True)
    df = pd.DataFrame(rows)
    print df.info()
    t, e = lifelines.utils.datetimes_to_durations(
        start_times=pd.to_datetime(df.date_start_bioassay),
        end_times=pd.to_datetime(df.date_died),
        fill_date=pd.to_datetime(df.date_end_bioassay))
    print t
    df['t'] = t
    df['e'] = e

    # Create survorship plot
    fig, ax = plt.subplots(figsize=(18, 6))
    kmf = KaplanMeierFitter()
    for name, grouped_df in df.groupby('bioassay_treatment'):
        kmf.fit(grouped_df['t'], grouped_df['e'], label=name)
        kmf.plot(ax=ax, linewidth=5, ci_show=False)
    ax.set_xlabel('days after treatment')
    ax.set_ylabel('proportion dead')
    ax.set_ylim([0, 1])
    fig.savefig('survivorshipfig.pdf')

    # Create motality-table-tex
    results = statistics.pairwise_logrank_test(df['t'],
                                               df['bioassay_treatment'],
                                               df['e'])
    s = r'''
        \begin{table}[h!]
        \centering
        \caption{Pairwise differences among mortality curves.}
    '''
    s += results.summary.to_latex()
    s += '\end{table}'
    mortality_table_tex = s
    print s

    # Create document
    s = r'''
        \documentclass[11pt]{scrartcl}
        \usepackage{textcomp}
        \usepackage{gensymb}
        \usepackage{graphicx}
        \usepackage{grffile} %required because there are multiple dot characters in my file names
        \usepackage{booktabs}
        \usepackage[letterpaper, margin=1in]{geometry}
        
        \titlehead{\centering\includegraphics[width=0.75in]{applications/rearing/static/images/crb_logo.png}\\
        	University of Guam Coconut Rhinoceros Beetle Biological Control Project\\
        	Bioassay Report generated by CRB Rearing Database v.20190317\\
        	https://aubreymoore.pythonanywhere.com/rearing}
        \title{---title---}
        \author{Aubrey Moore and Jim Grasela\\University of Guam Coconut Rhinoceros Beetle Biocontrol Project}

\begin{document}

    \begin{titlepage}
        \maketitle
		\tableofcontents
	\end{titlepage}

        ---description---
        
        \clearpage
        \section{Mortality}
        \includegraphics[width=\textwidth]{survivorshipfig.pdf}
        ---mortality-table-tex---
        
        \clearpage
        \section{Mass}
        ---mass plots---
        
        \clearpage
        \section{Postmortem Images}
        ---postmortem images---
        
        \end{document}
    '''
    s = s.replace('---title---', tex_escape(bioassay_name))
    s = s.replace('---mortality-table-tex---', mortality_table_tex)

    # Replace ---description--- with tex from database
    sql = 'select tex from bioassay where name="{}"'.format(
        bioassay_name_pattern)
    logger.debug(sql)
    rows = db.executesql(sql)
    if rows:
        logger.debug(rows[0])
        description = rows[0][0]
        description = description.encode('utf8')
        logger.debug('type: {}   description: {}'.format(
            type(description), description))
        s = s.replace('---description---', description)
    else:
        s = s.replace('---description---', '')

    #Replace ---mass plots---
    file_list = plot_mass_by_treatment(bioassay_name_pattern)
    logger.debug('file_list: {}'.format(file_list))
    tex = ''
    for file_name in file_list:
        tex += r'\subsection*{' + file_name.replace('-', ' ') + r'}'
        tex += '\n'
        tex += r'\includegraphics[width=\textwidth]{' + file_name + r'}'
        tex += '\n'
    s = s.replace('---mass plots---', tex)
    if False:
        #Replace ---postmortem images---
        tex = get_postmortem_images(bioassay_name_pattern)
        s = s.replace('---postmortem images---', tex)

    #s = tex_escape(s)
    logger.debug(s)

    # Generate PDF
    with open('report.tex', "w") as f:
        f.write(s)
    result = subprocess.call(['pdflatex', 'report.tex'])
    result = subprocess.call(['pdflatex', 'report.tex'])
    return response.stream('report.pdf')
Esempio n. 19
0

plt.rcParams['figure.figsize'] = [12, 5]

kmf.plot();


print(dfCurvas.sexo.value_counts())
curvaSobrevivencia(dfCurvas,'sexo')


results=multivariate_logrank_test(event_durations=T,groups=dfCurvas.sexo,event_observed=C)
results.print_summary()


results=pairwise_logrank_test(event_durations=T,groups=dfCurvas.sexo,event_observed=C)
results.print_summary()


dfCurvas.mesesUP.describe()


var='mesesUP'
varEscalao='escMesesUP'
dfCurvas[varEscalao]=''
for index, cliente in dfCurvas.iterrows():
    #se a variável tiver o valor 1 colocar na nova variável a descrição da atividade
    if cliente[var] <= 2: 
        dfCurvas.at[index,varEscalao]=var+' less than 2'
    elif (cliente[var] > 2) & (cliente[var] <= 4):
        dfCurvas.at[index,varEscalao]=var+' greather than 2 and less 4'
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset["T"], waltons_dataset["group"])
    assert R.summary.loc[("control", "miR-137")]["p"] < 0.05
Esempio n. 21
0
def do_cum_incidence(gtdf,label):
    # Calculate cumulative wing extension since last laser on.
    # (This is ugly...)

    gtdf['cum_wei_this_trial'] = 0
    gtdf['head_trial'] = 0
    gtdf['thorax_trial'] = 0
    obj_ids = gtdf['obj_id'].unique()
    for obj_id in obj_ids:
        prev_laser_state = 0
        cur_cum_this_trial = 0
        cur_head_trial = 0
        cur_thorax_trial = 0
        cur_ttm = 0
        cur_state = 'none'
        prev_t = -np.inf
        for rowi in range(len(gtdf)):
            row = gtdf.iloc[rowi]
            if row['obj_id'] != obj_id:
                continue
            assert row['t'] > prev_t
            prev_t = row['t']
            if prev_laser_state == 0 and row['laser_state']:
                # new laser pulse, reset cum
                cur_cum_this_trial = 0
                if row['ttm'] < 0:
                    cur_head_trial += 1
                    cur_state = 'head'
                elif row['ttm'] > 0:
                    cur_thorax_trial += 1
                    cur_state = 'thorax'
            prev_laser_state = row['laser_state']
            if row['zx'] > 0:
                cur_cum_this_trial = 1
            #gtdf['cum_wei_this_trial'] = cur_cum_this_trial
            row['cum_wei_this_trial'] = cur_cum_this_trial
            if cur_state=='head':
                row['head_trial'] = cur_head_trial
            elif cur_state=='thorax':
                row['thorax_trial'] = cur_thorax_trial
            gtdf.iloc[rowi] = row

    fig_cum = plt.figure('cum indx: %s'%label,figsize=(6,5))
    ax_cum = fig_cum.add_subplot(111)
    pulse_nums = [1,2,3]
    this_data = {}
    head_data = []
    thorax_data = []

    sdx_e = []
    sdx_g = []
    sdx_c = []

    for pulse_num in pulse_nums:
        head_pulse_df = gtdf[ gtdf['head_trial']==pulse_num ]
        thorax_pulse_df = gtdf[ gtdf['thorax_trial']==pulse_num ]

        h = _do_group( head_pulse_df )
        t = _do_group( thorax_pulse_df )
        sdx_e, sdx_g, sdx_c = _update_latencies( head_pulse_df,sdx_e, sdx_g, sdx_c,'head')
        sdx_e, sdx_g, sdx_c = _update_latencies( thorax_pulse_df,sdx_e, sdx_g, sdx_c,'thorax')
        this_data['head%d'%pulse_num] = h
        this_data['thorax%d'%pulse_num] = t
        head_data.append( h )
        thorax_data.append( t )
        plot_cum( ax_cum, h, #label='head %d'%pulse_num,
                  lw=0.5, color=COLORS[HEAD])
        plot_cum( ax_cum, t, #label='thorax %d'%pulse_num,
                  lw=0.5, color=COLORS[THORAX])
    all_head_data = combine_cum_data( head_data )
    all_thorax_data = combine_cum_data( thorax_data )
    plot_cum( ax_cum, all_head_data, label='head',
              lw=2, color=COLORS[HEAD])
    plot_cum( ax_cum, all_thorax_data, label='thorax',
              lw=2, color=COLORS[THORAX])
    ax_cum.legend()

    sdx_e = np.array(sdx_e)
    sdx_g = np.array(sdx_g)
    sdx_c = np.array(sdx_c)

    buf = ''
    alpha = 0.95
    try:
        S,P,T = pairwise_logrank_test( sdx_e, sdx_g, sdx_c, alpha=alpha )
    except np.linalg.linalg.LinAlgError:
        buf += 'numerical errors computing logrank test'
    else:
        buf += '<h1>%s</h1>\n'%label
        buf += '<h2>pairwise logrank test</h2>\n'
        buf += '  analyses done using the <a href="http://lifelines.readthedocs.org">lifelines</a> library\n'
        buf += P._repr_html_()
        buf += '<h3>significant at alpha=%s?</h3>\n'%alpha
        buf += T._repr_html_()

    n_head   = len([x for x in sdx_g if x=='head'])
    n_thorax = len([x for x in sdx_g if x=='thorax'])
    p_value = P['head']['thorax']

    return {'fig':fig_cum,
            'ax':ax_cum,
            'n_head':n_head,
            'n_thorax':n_thorax,
            'p_value':p_value,
            'buf':buf,
            }
Esempio n. 22
0
def plot3(df):
    import sys
    #get_ipython().system('{sys.executable} -m pip install lifelines')

    #install pandas and matlab plot

    import pandas as pd
    import matplotlib.pyplot as plt

    from lifelines import KaplanMeierFitter

    # import os
    # os.chdir("/Users/MDONEGAN/Downloads")

    #survival= pd.read_csv("/Users/MDONEGAN/Downloads/Book2.csv", sep=',')
    survival = df

    from lifelines.statistics import pairwise_logrank_test

    results = pairwise_logrank_test(survival['time'], survival['group'],
                                    survival['event'])

    results.print_summary()

    #%%
    # this util converts a table with "death" and "censored" (alive) into  the lifelines format

    from lifelines import KaplanMeierFitter
    from lifelines.utils import survival_events_from_table

    kmf = KaplanMeierFitter()
    ax = plt.subplot(111)

    #df = pd.read_csv('/Users/MDONEGAN/Downloads/counts.csv')
    df = df.set_index('time')

    T, E, W = survival_events_from_table(df,
                                         observed_deaths_col='death',
                                         censored_col='censored')

    kmf.fit(T, E, weights=W)

    kmf.plot(ax=ax, ci_show=True, marker='o')
    plt.xlabel("days")
    plt.ylabel("survival %")
    plt.ylim(0.4, 1.05)

    #%%
    #trying to combine the grouping function and the events from table function

    from lifelines import KaplanMeierFitter
    from lifelines.utils import survival_events_from_table

    kmf = KaplanMeierFitter()
    ax = plt.subplot(111)

    #df = pd.read_csv('/Users/MDONEGAN/Downloads/counts.csv')
    df = df.set_index('time')

    T, E, W = survival_events_from_table(df,
                                         observed_deaths_col='death',
                                         censored_col='censored')

    print(E)

    #group dataset by treatment and plot all groups (treatments) using kmf fit
    for name, T_group, E_group, W_group in T, E, W.groupby('group'):
        kmf.fit(grouped_survival['T'], grouped_survival['E'], label=name)
        kmf.plot(ax=ax, ci_show=False, marker='o')
        plt.xlabel("days")
        plt.ylabel("survival %")
        plt.ylim(0.4, 1.05)

    return fig_to_uri(plt)
Esempio n. 23
0
                C = latency_arr < MAX_LATENCY
                #kmf.fit(latency_arr, censorship=C, label=name_key )
                for i in range(len(C)):
                    sdx_e.append( latency_arr[i] )
                    sdx_g.append( name_key )
                    sdx_c.append( C[i] )

            # --- stats --------------------------------

            sdx_e = np.array(sdx_e)
            sdx_g = np.array(sdx_g)
            sdx_c = np.array(sdx_c)

            alpha = 0.95
            try:
                S,P,T = pairwise_logrank_test( sdx_e, sdx_g, sdx_c, alpha=alpha )
            except np.linalg.linalg.LinAlgError:
                buf += 'numerical errors computing logrank test'
            else:
                buf += '<h2>pairwise logrank test</h2>\n'
                buf += '  analyses done using the <a href="http://lifelines.readthedocs.org">lifelines</a> library\n'
                buf += P._repr_html_()
                buf += '<h3>significant at alpha=%s?</h3>\n'%alpha
                buf += T._repr_html_()

            '''
            clipped = df_all.copy()
            clipped['latency'] = clipped['latency'].clip(upper=MAX_LATENCY)
            group_info = label_homogeneous_groups_pandas( clipped,
                                                          groupby_column_name='name_key',
                                                          value_column_name='latency')