Beispiel #1
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()
Beispiel #2
0
def EstimateHazardFuncion(past, current):
    """Estimates the hazard function by Kaplan-Meier.

    http://en.wikipedia.org/wiki/Kaplan%E2%80%93Meier_estimator

    past: list of durations for complete pregnancies
    current: list of durations for current pregnancies    
    """
    # pmf of pregnancies known to have ended at each timestep
    pmf = thinkstats2.MakePmfFromList(past)

    # survival curve for the known pregnancy lengths
    n = len(past)
    cdf_dur = thinkstats2.MakeCdfFromList(past)
    ts, ss = SurvivalFunction(cdf_dur)

    # CDF of duration for current pregnancies
    m = len(current)
    cdf_cur = thinkstats2.MakeCdfFromList(current)

    hazard_func = []

    for t, s in zip(ts, ss):
        ended = n * pmf.Prob(t)
        ongoing = n * s + m * (1 - cdf_cur.Prob(t))
        at_risk = ended + ongoing
        hazard = ended / at_risk
        hazard_func.append((t, hazard))

    return zip(*hazard_func)
Beispiel #3
0
def main():

    results = ReadResults()
    speeds = GetSpeeds(results)

    # speeds = BinData(speeds, 3, 12, 100)

    pmf = thinkstats2.MakePmfFromList(speeds, 'speeds')
    thinkplot.Hist(pmf)
    thinkplot.Show(title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='probability')
Beispiel #4
0
def main():

    results = ReadResults()
    speeds = GetSpeeds(results)
        
    #speeds = BinData(speeds, 3, 12, 25)

    pmf = thinkstats2.MakePmfFromList(speeds, 'speeds')
    mean = pmf.Mean()
    var = pmf.Var()
    print 'mean=',mean
    print 'var=',var    
    thinkplot.Hist(pmf)
    thinkplot.Show(title='PMF of running speed',
               xlabel='speed (mph)',
               ylabel='probability')
Beispiel #5
0
def main():

    results = ReadResults()
    speeds = GetSpeeds(results)

    speeds = BinData(speeds, 3, 12, 50)

    pmf = thinkstats2.MakePmfFromList(speeds, 'speeds')

    speed_observer = 7.5

    bimodalpmf = BiasPmf(pmf, speed_observer)

    thinkplot.Hist(bimodalpmf)
    thinkplot.Show(
        title='PMF of running speed (relative to observer speed = 7.5 MPH)',
        xlabel='speed (mph)',
        ylabel='probability')
Beispiel #6
0
def PlotConditionalSurvival(durations):
    """Plots conditional survival curves for a range of t0.

    durations: list of durations
    """
    pmf = thinkstats2.MakePmfFromList(durations)

    times = [8, 16, 24, 32]
    thinkplot.PrePlot(len(times))

    for t0 in times:
        ts, ss = ConditionalSurvival(pmf, t0)
        label = 't0=%d' % t0
        thinkplot.Plot(ts, ss, label=label)

        duration_cdf = DurationCdf(ts, ss)
        print t0, duration_cdf.Mean()

    thinkplot.Show()
Beispiel #7
0
def Process(table, name):
    """Runs various analyses on this table.

    Creates instance variables:
        ages: sequence of int ages in years
        age_pmf: Pmf object
        age_cdf: Cdf object
        weights: sequence of total weight in ounces
        weight_cdf: Cdf object
    """
    cumulative.Process(table, name)

    table.ages = [p.agepreg for p in table.records
                  if p.agepreg != 'NA']
    table.age_pmf = thinkstats2.MakePmfFromList(table.ages, table.name)
    table.age_cdf = thinkstats2.MakeCdfFromList(table.ages, table.name)

    table.weights = [p.totalwgt_oz for p in table.records
                     if p.totalwgt_oz != 'NA']
    table.weight_cdf = thinkstats2.MakeCdfFromList(table.weights, table.name)
Beispiel #8
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = thinkstats2.MakePmfFromList(speeds, 'actual speeds')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')

    thinkplot.Hist(biased)
    thinkplot.Save(root='observed_speeds',
                   title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='probability')

    cdf = thinkstats2.MakeCdfFromPmf(biased)

    thinkplot.Clf()
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='observed_speeds_cdf',
                   title='CDF of running speed',
                   xlabel='speed (mph)',
                   ylabel='cumulative probability')
Beispiel #9
0
    Inputs:
        
        pmf:  Probability Mass Function Object of lifetimes
        age: age (a integer in years)
    
    Outputs: 
        
        
    """
    
    copy = pmf.Copy()
    
    # leave remaining lifetime years (remove ages younger than current) 
    for val in copy.Values():
        if val <= age:
            pmf.Remove(val)

    pmf.Normalize()
    
    return pmf

if __name__ == '__main__':    
    pmf = thinkstats2.MakePmfFromList([1, 2, 2, 3, 4, 5])
    rl_pmf = RemainingLifetimes(pmf, 2)
    pyplot.bar
    
        
    
    
    
Beispiel #10
0
import thinkstats2


def PmfMean(pmf):
    """
	pmf: prob mass function object
	"""
    mean = 0

    for val, freq in pmf.Items():
        mean += freq * val

    return mean


v = [1, 1, 2, 3, 4]
pmf = thinkstats2.MakePmfFromList(v)
print "Mean = ", PmfMean(pmf)