Python transform_data Exemples, data_operations.transform_data Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : example_bladder.py Projet : majuvi/PyMCF

    plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs, print_data

if __name__ == '__main__':

    # Bladder Example
    #raw_data = pd.read_csv('datas/bladder_recurrent.csv', sep=';')
    raw_data = pd.read_csv('datas/T45.csv', sep=';')
    raw_data.rename(columns={'Patient number': 'Sample', 'Event': 'Number', 'Censored': 'Event',
                             'Treatment group': 'Drug'}, inplace=True)
    raw_data['Drug'] = raw_data['Drug'].map({1: 'Placebo', 2: 'Pyridoxine', 3: 'Thiotepa'})
    raw_data['Event'] = 1 - raw_data['Event']

    tb = print_data(raw_data)
    print tb

    df = transform_data(raw_data)
    lt = mcf(df, robust=True, positive=False)

    covariate = 'Drug'
    cohort1, cohort2 = 'Placebo', 'Thiotepa'

    group_df = df.set_index(covariate)
    group_lt = df.groupby(covariate).apply(lambda sfr: mcf(sfr, robust=True, positive=False))

    df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2]
    lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2]

    lt_diff = mcfdiff(lt1, lt2)

    # Plot, no stratification
    fig, (ax1, ax2) = plt.subplots(2, 1)

Exemple #2

0

Afficher le fichier

            rows.append((i, t_ij, 1, cost))
            t_ij += expon.rvs(scale=1. / rate)
        rows.append((i, t_i, 0, np.nan))
    return pd.DataFrame(rows, columns=['Sample', 'Time', 'Event', 'Cost'])


if __name__ == '__main__':

    # Cook (2008, pg. 299) Example 8.1: Field Repair data
    #   This dataset (see Appendix D) gives simulated data on unscheduled repairs
    #   for a fleet of m = 134 large utility vehicles operated by a city. The data were
    #   collected over a three-year period on new vehicles which were purchased and
    #   placed in service over the first two years of the study. Time is measured in
    #   years from the start of the study, and costs are in hundreds of dollars.
    raw_data = generate_data()
    df = transform_data(raw_data)

    # Table D.4.

    # Marked point process (8.14) as mcf1 and cumulative cost process (8.12) as mcf2
    lt1 = mcf(df, robust=True, positive=False)
    mcf1 = mcfcost(df, mcf_compound=lt1, robust=False, positive=False)
    mcf2 = mcfcost(df, robust=True, positive=False)

    # Plot data
    plot_data(df, alpha=0.5, marker='.', plot_costs=False)

    # Plot MCFs
    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=False)
    plot_mcf(mcf1, ax=ax1, cost=True, label='Events * Cost')
    plot_mcf(mcf2, ax=ax2, cost=True, label='Cost')

Exemple #3

0

Afficher le fichier

Fichier : demo_bladder.py Projet : majuvi/PyMCF

    raw_data.rename(columns={
        'Patient number': 'Sample',
        'Event': 'Number',
        'Censored': 'Event',
        'Treatment group': 'Drug'
    },
                    inplace=True)
    raw_data['Drug'] = raw_data['Drug'].map({
        1: 'Placebo',
        2: 'Pyridoxine',
        3: 'Thiotepa'
    })
    raw_data['Event'] = 1 - raw_data['Event']
    print raw_data.head()

    df = transform_data(raw_data)
    lt = mcf(df, robust=True, positive=False)

    covariate = 'Drug'
    cohort1, cohort2 = 'Placebo', 'Thiotepa'

    group_df = df.set_index(covariate)
    group_lt = df.groupby(covariate).apply(
        lambda sfr: mcf(sfr, robust=True, positive=False))

    df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2]
    lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2]

    lt_diff = mcfdiff(lt1, lt2)

    # Plot, no stratification

Exemple #4

0

Afficher le fichier

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_operations import transform_data, mcf, mcfdiff, mcfequal,\
    plot_data, plot_datas, plot_mcf, plot_mcfs, plot_mcfdiff, plot_mcfdiffs

if __name__ == '__main__':

    # Transmission Example
    data = pd.read_csv('datas/transmission_recurrent.csv', sep=';')

    df = transform_data(data, format='recurrent')
    lt = mcf(df, robust=True, positive=False)
    print lt[['Time', 'Y', 'N', 'dE[N]', 'E[N]', 'E[N].lcl',
              'E[N].ucl']].head()

    covariate = 'Transmission'
    cohort1, cohort2 = 'Automatic', 'Manual'

    group_df = df.set_index(covariate)
    group_lt = df.groupby(covariate).apply(
        lambda sfr: mcf(sfr, robust=True, positive=True))  #, interval=True))

    df1, df2 = group_df.ix[cohort1], group_df.ix[cohort2]
    lt1, lt2 = group_lt.ix[cohort1], group_lt.ix[cohort2]

    lt_diff = mcfdiff(lt1, lt2)

    # Plot, no stratification
    fig, (ax1, ax2) = plt.subplots(2, 1)
    plot_data(df, ax=ax1)

Exemple #5

0

Afficher le fichier

Fichier : example_monetization.py Projet : majuvi/PyMCF

            T, columns=['Sample', 'Time', 'Event', 'Type', 'Cost'])


if __name__ == '__main__':

    max_date = 180
    play_rate, purchase_rate, churn_rate = 0.1, 0.004, 0.02
    playtime_dist = stats.expon(scale=0.25)
    purchase_dist = stats.rv_discrete(values=((1, 4, 10, 20), (0.80, 0.15,
                                                               0.04, 0.01)))

    process = MarkovRecurrentTerminal(play_rate, purchase_rate, churn_rate,
                                      playtime_dist, purchase_dist)
    raw_data = process.Sample(300, max_date)

    df_sessions = transform_data(raw_data[(raw_data['Type'] == 0) |
                                          (raw_data['Event'] == 0)])
    df_purchases = transform_data(raw_data[(raw_data['Type'] == 1) |
                                           (raw_data['Event'] == 0)])

    mcf_sessions = mcfcost(df_sessions, robust=True, positive=True)
    mcf_purchases = mcfcost(df_purchases, robust=True, positive=True)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,
                                                 2,
                                                 sharex=True,
                                                 sharey=False)
    plot_data(df_sessions, ax=ax1, marker='.', label='Session', alpha=0.2)
    plot_data(df_purchases, ax=ax2, marker='*', label='Purchase', alpha=0.2)
    plot_mcf(mcf_sessions, ax=ax3, cost=True).set_ylabel('Playtime')
    plot_mcf(mcf_purchases, ax=ax4, cost=True).set_ylabel('Lifetime Value')