コード例 #1
0
ファイル: times.py プロジェクト: jonathanlee1/SARBayes
def read_time_data(url):
    """
    Read the time of day each incident occurred at.

    Arguments:
        url: A string representing the path to the database.

    Returns:
        A `pandas` dataframe with two columns: `time` and `hour`. `time`
        contains Python `datetime.time` objects with times at midnight filtered
        out (most of these indicate a date was available, but not time). `hour`
        is `time` in hours (a float between 0 and 24, exclusive).

        `time` is derived from `Incident.datetime`.
    """
    engine, session = database.initialize(url)
    df = tabulate(session.query(Incident.datetime))
    database.terminate(engine, session)

    df = df.assign(time=[datetime.time() for datetime in df.datetime])
    df = df[df.time != datetime.time(0)]
    df = df.assign(hour=[time.hour + time.minute/60 + time.second/3600
                         for time in df.time])

    return df
コード例 #2
0
ファイル: tests.py プロジェクト: ctwardy/SARbayes-fork
    def test_tabulation(self):
        query = self.session.query(Subject.survived, Subject.age)
        df = tabulate(query)

        self.assertEqual(list(df.columns), ['survived', 'age'])
        self.assertEqual(len(df), self.session.query(Subject).count())
        for columns in df.itertuples(False):
            for value in columns:
                self.assertIsNotNone(value)
コード例 #3
0
ファイル: empirical.py プロジェクト: ctwardy/SARbayes-fork
def read_data(url):
    engine, session = database.initialize(url)

    query = session.query(Incident.total_hours, Subject.survived,
                          Group.category).join(Group, Subject)
    df = tabulate(query)

    database.terminate(engine, session)

    return df
コード例 #4
0
ファイル: times.py プロジェクト: ctwardy/SARbayes-fork
def read_time_data(url):
    engine, session = database.initialize(url)
    df = tabulate(session.query(Incident.datetime))
    database.terminate(engine, session)

    df = df.assign(time=[datetime.time() for datetime in df.datetime])
    df = df[df.time != datetime.time(0)]
    df = df.assign(hour=[time.hour + time.minute/60 + time.second/3600
                         for time in df.time])

    return df
コード例 #5
0
ファイル: util.py プロジェクト: jonathanlee1/SARBayes
def read_simple_data(url, exclude_singles=False, exclude_groups=False):
    """
    Read incident duration, survival, and category data. A useful shorthand.

    Arguments:
        url: A string representing the database URL to connect to.
        exclude_singles: A boolean indicating whether the query should exclude
                         subjects from groups with exactly one member.
        exclude_groups: A boolean indicating whether the query should exclude
                        subjects from groups with more than one members.

    Returns:
        A pandas dataframe containing the lost person data. The columns include
        `total_hours`, `survived`, `category`, `days` (the incident duration in
        days, as taken from `total_hours`), and `doa` (a boolean that is `True`
        is the subject did not survive). Cases with a negative timedelta
        `Incident.total_hours` are filtered out.

    Warning:
        If `exclude_singles` is `True` or `exclude_groups` is `True`, the
        function also needs to query the size of each `Group`, which may take
        a while (perhaps a minute).
    """
    engine, session = database.initialize(url)

    columns = Incident.total_hours, Subject.survived, Group.category, Group.id
    query = session.query(*columns).join(Group, Subject)
    df = tabulate(query)

    database.terminate(engine, session)

    if exclude_singles or exclude_groups:
        df['size'] = [Group.query.get(int(id_)).size for id_ in df.id]  # Hack
    if exclude_singles:
        df = df[df['size'] > 1]
    if exclude_groups:
        df = df[df['size'] == 1]

    if 'size' in df:
        df.drop('size', 1, inplace=True)
    df.drop('id', 1, inplace=True)

    df = df.assign(days=[total_hours.total_seconds()/3600/24
                         for total_hours in df.total_hours],
                   doa=[not survived for survived in df.survived])
    df = df[0 <= df.days]

    return df
コード例 #6
0
ファイル: bayes.py プロジェクト: ctwardy/SARbayes-fork
#!/usr/bin/env python3

import matplotlib.pyplot as plt
import numpy as np
import Orange
from pomegranate import *

import database
from database.models import Subject, Group, Incident, Weather
from database.processing import survival_rate, tabulate, export_as_orange


engine, session = database.initialize('sqlite:///../data/isrid-master.db')

query = session.query(Subject.survived, Group.size, Weather.avg_temp)
query = query.join(Group, Incident, Weather)
df = tabulate(query, [True, True, True])

database.terminate(engine, session)


print(sum(df.survived)/len(df))
コード例 #7
0
ファイル: main.py プロジェクト: jonathanlee1/SARBayes
import pandas as pd

import database
from database.models import Incident, Group, Subject
from database.processing import tabulate


# Get data

# Path may vary based on your current working directory
engine, session = database.initialize('sqlite:///../../data/isrid-master.db')

query = session.query(Subject.survived, Incident.total_hours, Group.category,
                      Group.id, Subject.age, Subject.sex)
query = query.join(Group, Incident)
df = tabulate(query)
df['size'] = [Group.query.get(int(id)).size for id in df.id]  # Bad hack
df['days'] = [hours.total_seconds()/3600/24 for hours in df.total_hours]

database.terminate(engine, session)


# Build UI

plot = figure(y_range=Range1d(bounds='auto', start=0, end=1 + 1e-3),
              plot_width=1000, title='Lost Person Survival Over Time')

status = Paragraph()  # Used for notifying the user when the constraints
                      # exclude all cases, rather than failing silently.

# Create a list of checkboxes for enabling each category
コード例 #8
0
ファイル: kaplanmeier.py プロジェクト: ctwardy/SARbayes-fork
def execute():
    matplotlib.rc("font", size=20)

    engine, session = database.initialize("sqlite:///../data/isrid-master.db")

    # Query with Group.size may take awhile, at least for Charles
    # Not sure why
    query = session.query(Incident.total_hours, Subject.survived, Group.category, Group.size).join(Group, Subject)
    print("Tabulating query... may take awhile for unknown reasons.")
    df = tabulate(query)
    print("Done tabulating.")
    print(df.describe())
    database.terminate(engine, session)

    df = df.assign(
        days=[total_hours.total_seconds() / 3600 / 24 for total_hours in df.total_hours],
        doa=[not survived for survived in df.survived],
    )
    df = df[0 <= df.days]

    rows, columns = 2, 2
    grid, axes = plt.subplots(rows, columns, figsize=(15, 10))

    categories = Counter(df.category)
    plot = 0
    kmfs = []
    options = {"show_censors": True, "censor_styles": {"marker": "|", "ms": 6}, "censor_ci_force_lines": False}

    for category, count in categories.most_common()[: rows * columns]:
        print("Category:", category)
        ax = axes[plot // columns, plot % columns]
        df_ = df[df.category == category]
        N, Ndoa = len(df_), sum(df_.doa)
        Srate = 100 * (1 - Ndoa / N)
        grp = df_[df_.size > 1]
        sng = df_[df_.size == 1]
        kmf = KaplanMeierFitter()
        # kmf.fit(df_.days, event_observed=df_.doa, label=category)
        # kmf.plot(ax=ax, ci_force_lines=True)
        kmf.fit(grp.days, event_observed=grp.doa, label=category + " Groups")
        kmf.plot(ax=ax, **options)
        kmf.fit(sng.days, event_observed=sng.doa, label=category + " Singles")
        kmf.plot(ax=ax, **options)
        kmfs.append(kmf)

        ax.set_xlim(0, min(30, 1.05 * ax.get_xlim()[1]))
        ax.set_ylim(0, 1)
        ax.set_title("{}, N = {}, DOA = {}, {:.0f}% surv".format(category, N, Ndoa, Srate))
        ax.set_xlabel("Total Incident Time (days)")
        ax.set_ylabel("Probability of Survival")

        # ax.legend_.remove()
        # ax.grid(True)

        plot += 1

    grid.suptitle("Kaplan-Meier Survival Curves", fontsize=25)
    grid.tight_layout()
    grid.subplots_adjust(top=0.9)
    grid.savefig("../doc/figures/kaplan-meier/km-grid-large.svg", transparent=True)

    combined = plt.figure(figsize=(15, 10))
    ax = combined.add_subplot(1, 1, 1)
    for kmf in kmfs[: rows * columns]:
        kmf.plot(ci_show=False, show_censors=True, censor_styles={"marker": "|", "ms": 6}, ax=ax)

    ax.set_xlim(0, 15)
    ax.set_ylim(0, 1)
    ax.set_xlabel("Total Incident Time (days)")
    ax.set_ylabel("Probability of Survival")
    ax.set_title("Kaplan-Meier Survival Curves", fontsize=25)
    ax.grid(True)
    combined.savefig("../doc/figures/kaplan-meier/km-combined-large.svg", transparent=True)

    plt.show()
コード例 #9
0
ファイル: colormap.py プロジェクト: jonathanlee1/SARBayes
def main():
    """
    Plot the profile (size and survival rate) of the most common categories.
    """
    ## Read data

    engine, session = database.initialize('sqlite:///../data/isrid-master.db')

    query = session.query(Subject.age, Group.category, Subject.survived)
    query = query.join(Group)
    df = tabulate(query)

    database.terminate(engine, session)

    ## Process subjects by category and age

    selected_categories = df.category.value_counts()[:10].index.tolist()
    df = df[df.category.isin(selected_categories)]
    age_bins = np.linspace(0, 100, 11)

    survival_rates = np.full((10, 10), np.nan, dtype=np.float64)
    subgroup_sizes = np.full((10, 10), 0, dtype=np.float64)
    min_subgroup_size = 10

    for category, group in df.groupby('category'):
        group.insert(len(group.columns), 'age_bin',
                     np.digitize(group.age, age_bins))

        for age_index, subgroup in group.groupby('age_bin'):
            survivals = subgroup.survived.values.tolist()
            key = age_index - 1, selected_categories.index(category)

            if len(survivals) > min_subgroup_size:
                survival_rates[key] = sum(survivals)/len(survivals)
                subgroup_sizes[key] = len(survivals)

                # Debugging
                lower, upper = age_bins[age_index - 1], age_bins[age_index]
                print('{}, {} - {} years old'.format(category, int(lower),
                      int(upper)))
                print('  Survival rate: {:.3f}%'.format(
                      100*survival_rates[key]))
                print('  Number of subjects: {}'.format(
                      int(subgroup_sizes[key])))

    ## Plot survival rates and subgroup sizes

    canvas = plt.matshow(survival_rates, fignum=False, cmap='RdYlGn',
                         origin='lower')
    colorbar = plt.colorbar(canvas)
    colorbar.solids.set_edgecolor('face')
    colorbar.set_label('Survival Rate')

    x_positions = y_positions = np.arange(0, 10)
    for x in x_positions:
        for y in y_positions:
            plt.text(x, y, int(subgroup_sizes[y, x]) or '',
                     horizontalalignment='center', verticalalignment='center')

    plt.title('Lost Person Category Profiles')
    plt.ylabel('Age (years)')
    plt.xlabel('Category')

    ax = plt.gca()
    ax.xaxis.tick_bottom()
    plt.yticks(np.linspace(0, 10, 11) - 0.5, age_bins.astype(np.int))
    plt.xticks(x_positions, selected_categories, rotation=60)
    plt.subplots_adjust(bottom=0.2)
    plt.tight_layout()
    plt.savefig('../doc/figures/subject-data/category-profiles.svg',
                transparent=True)
    plt.show()
コード例 #10
0
ファイル: plots.py プロジェクト: jonathanlee1/SARBayes
"""
plots -- Plots of subject data
"""

import matplotlib.pyplot as plt

import database
from database.models import Subject
from database.processing import tabulate

## Fetch data

engine, session = database.initialize('sqlite:///../data/isrid-master.db')
query = session.query(Subject.age, Subject.weight, Subject.height)
query = query.filter(Subject.age != None)
df = tabulate(query, not_null=False)
database.terminate(engine, session)

## Make weight vs. age plot

color = '#177788'
df_filtered = df[df.weight.notnull()]
plt.figure(1)
plt.scatter(df_filtered.age, df_filtered.weight, c=color, alpha=0.5)
plt.xlim(0, df_filtered.age.max() + 5)
plt.ylim(0, df_filtered.weight.max() + 5)
plt.title('Weight vs. Age')
plt.xlabel('Age (year)')
plt.ylabel('Weight (kg)')
plt.tight_layout()
plt.savefig('../doc/figures/subject-data/weight-vs-age-plot.svg',