Ejemplos de RandomSeed en Python, ejemplos de thinkstats2.RandomSeed en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: brfss.py Proyecto: smithb16/ThinkStats2

def MakeDF(script, nrows=1000):
    """Tests the functions in this module.

    script: string script name
    """
    thinkstats2.RandomSeed(17)

    nrows = int(nrows)
    df = ReadBrfss(nrows=nrows)
    #MakeFigures(df)

    Summarize(df, 'htm3', 'Height (cm):')
    Summarize(df, 'wtkg2', 'Weight (kg):')
    Summarize(df, 'wtyrago', 'Weight year ago (kg):')

    if nrows == 1000:
        assert(df.age.value_counts()[40] == 28)
        assert(df.sex.value_counts()[2] == 668)
        assert(df.wtkg2.value_counts()[90.91] == 49)
        assert(df.wtyrago.value_counts()[160/2.2] == 49)
        assert(df.htm3.value_counts()[163] == 103)
        assert(df.finalwt.value_counts()[185.870345] == 13)
        print('%s: All tests passed.' % script)

    return df

Ejemplo n.º 2

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    Estimate1()
    Estimate2()
    Estimate3(m=1000)
    SimulateSample()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: brfss_scatter.py Proyecto: Ehsan1981/ThinkStats2

def MakeFigures():
    """Make scatterplots.
    """
    thinkstats2.RandomSeed(17)

    df = brfss.ReadBrfss(nrows=None)
    sample = SampleRows(df, 5000, replace=False)

    heights, weights = GetHeightWeight(sample)
    assert (heights.values[100] == 175)
    assert (weights.values[100] == 86.36)

    ScatterPlot('brfss_scatter1', heights, weights)

    heights, weights = GetHeightWeight(sample, hjitter=1.5, wjitter=1.1)
    assert (int(heights.values[100]) == 173)
    assert (int(weights.values[100]) == 85)

    ScatterPlot('brfss_scatter2', heights, weights)
    ScatterPlot('brfss_scatter3', heights, weights, alpha=0.1)

    # make a hexbin of all records
    heights, weights = GetHeightWeight(df, hjitter=1.3, wjitter=1.1)
    assert (int(heights.values[100]) == 171)
    assert (int(weights.values[100]) == 55)
    HexBin('brfss_scatter4', heights, weights)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: Week8_chap09sol_9.1_rkarna.py Proyecto: rkarna/ThinkStats2

def main():
    thinkstats2.RandomSeed(18)
    live, firsts, others = first.MakeFrames()
    n = len(live)
    for _ in range(7):
        sample = thinkstats2.SampleRows(live, n)
        RunTests(sample)
        n //= 2

Ejemplo n.º 5

0

Mostrar archivo

Archivo: chap14ex_soln.py Proyecto: robJCollins/ThinkStats2

def main():
    thinkstats2.RandomSeed(17)

    live, firsts, others = first.MakeFrames()
    PlotAdultWeights(live)

    PlotPregLengths(live, firsts, others)

    TestIntervention()

Ejemplo n.º 6

0

Mostrar archivo

def main(name, data_dir=''):
    thinkstats2.RandomSeed(17)

    MakeExample()
    live, firsts, others = first.MakeFrames()
    RandomFigure(live)
    TestSample(live)
    MakeCdf(live)
    MakeFigures(live, firsts, others)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: chap8ex.py Proyecto: seppomerimaa/ThinkStats2

def main():
    thinkstats2.RandomSeed(17)

    # Estimate1()
    # Estimate2()
    # Estimate3(m=1000)
    # SimulateSample()
    # ex1()
    # ex2()
    ex3()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: scatter.py Proyecto: avinashalapati09/dsc530

def main(script):
    thinkstats2.RandomSeed(17)

    df = brfss.ReadBrfss(nrows=None)
    df = df.dropna(subset=['htm3', 'wtkg2'])
    Correlations(df)
    return

    MakeFigures(df)
    BinnedPercentiles(df)

Ejemplo n.º 9

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    live, _, _ = first.MakeFrames()
    EstimateBirthWeight(live)

    live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    PlotSamplingDistributions(live)

    PlotFit(live)
    PlotResiduals(live)

Ejemplo n.º 10

0

Mostrar archivo

def main():
    #random seed saves the random samples
    thinkstats2.RandomSeed(23)
    live, firsts, others = first.MakeFrames()
    RunResampleTest(firsts, others)

    n = len(live)
    for _ in range(7):
        sample = thinkstats2.SampleRows(live, n)
        RunTests(sample)
        n //= 2

Ejemplo n.º 11

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)
    Estimate4()
    return

    for n in [10, 100, 1000]:
        stderr = SimulateSample(n=n)
        print(n, stderr)

    Estimate1()
    Estimate2()

Ejemplo n.º 12

0

Mostrar archivo

Archivo: regression.py Proyecto: UnderPaidMathematician/ThinkStats2

def main(name, data_dir='.'):
    thinkstats2.RandomSeed(17)
    LogisticRegressionExample()

    live, firsts, others = first.MakeFrames()
    live['isfirst'] = (live.birthord == 1)

    RunLogisticModels(live)

    RunSimpleRegression(live)
    RunModels(live)

    PredictBirthWeight(live)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: Week7_chap07sol_7.1_rkarna.py Proyecto: rkarna/ThinkStats2

def main(script):
    thinkstats2.RandomSeed(17)

    live, firsts, others = first.MakeFrames()
    live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    BinnedPercentiles(live)

    ages = live.agepreg
    weights = live.totalwgt_lb
    print('thinkstats2 Corr', thinkstats2.Corr(ages, weights))
    print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(ages, weights))

    ScatterPlot(ages, weights, alpha=0.1)
    thinkplot.Save(root='chap07scatter1', legend=False, formats=['jpg'])

Ejemplo n.º 14

0

Mostrar archivo

Archivo: brfss_corr.py Proyecto: wu12345/ThinkStats2

def main(name, nrows=None):
    thinkstats2.RandomSeed(17)

    if nrows is not None:
        nrows = int(nrows)

    df = brfss.ReadBrfss(nrows=nrows)

    columns = df[['htm3', 'wtkg2']].dropna()
    heights, weights = columns.htm3.values, columns.wtkg2.values

    TestCorrelation(heights, weights)
    if nrows == None:
        ComputeCorrelations(heights, weights)

Ejemplo n.º 15

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    # run the coin test
    ct = CoinTest((140, 110))
    pvalue = ct.PValue()
    print('coin test p-value', pvalue)

    # compare pregnancy lengths
    print('\nprglngth')
    live, firsts, others = first.MakeFrames()
    data = firsts.prglngth.values, others.prglngth.values
    RunTests(data)

    # compare birth weights
    print('\nbirth weight')
    data = (firsts.totalwgt_lb.dropna().values,
            others.totalwgt_lb.dropna().values)
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print('means permute two-sided')
    PrintTest(p_value, ht)

    # test correlation
    live2 = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    data = live2.agepreg.values, live2.totalwgt_lb.values
    ht = CorrelationPermute(data)
    p_value = ht.PValue()
    print('\nage weight correlation')
    print('n=', len(live2))
    PrintTest(p_value, ht)

    # run the dice test
    RunDiceTest()

    # compare pregnancy lengths (chi-squared)
    data = firsts.prglngth.values, others.prglngth.values
    ht = PregLengthTest(data)
    p_value = ht.PValue()
    print('\npregnancy length chi-squared')
    PrintTest(p_value, ht)

    # compute the false negative rate for difference in pregnancy length
    data = firsts.prglngth.values, others.prglngth.values
    neg_rate = FalseNegRate(data)
    print('false neg rate', neg_rate)

    # run the tests with new nsfg data
    ReplicateTests()

Ejemplo n.º 16

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    MakeCltPlots()

    print('Gorilla example')
    dist = Normal(90, 7.5**2)
    print(dist)
    dist_xbar = dist.Sum(9) / 9
    print(dist_xbar.sigma)
    print(dist_xbar.Percentile(5), dist_xbar.Percentile(95))

    live, firsts, others = first.MakeFrames()
    TestCorrelation(live)
    PlotPregLengths(live, firsts, others)

    TestChiSquared()

Ejemplo n.º 17

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    # get the data
    live, firsts, others = first.MakeFrames()
    mean_var = thinkstats2.MeanVar(live.prglngth)
    print('(Mean, Var) of prglength for live births', mean_var)
    data = firsts.prglngth.values, others.prglngth.values

    # test the difference in means
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis1',
                   title='Permutation test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in std
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis2',
                   title='Permutation test',
                   xlabel='difference in std (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in means by resampling
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis3',
                   title='Resampling test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: timeseries.py Proyecto: crskbel-ca/PythonBayesianStatistics

def main(name):
    thinkstats2.RandomSeed(18)
    transactions = ReadData()

    dailies = GroupByQualityAndDay(transactions)
    PlotDailies(dailies)
    RunModels(dailies)
    PrintSerialCorrelations(dailies)
    MakeAcfPlot(dailies)

    name = 'high'
    daily = dailies[name]

    PlotLinearModel(daily, name)
    PlotRollingMean(daily, name)
    PlotFilled(daily, name)

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries4',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

    name = 'medium'
    daily = dailies[name]

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotIntervals(daily, years)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries5',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: analytic.py Proyecto: AxleMaxGit/python-data-science-projects

def main():
    thinkstats2.RandomSeed(18)
    MakeExampleNormalPlot()

    # make the analytic CDFs
    MakeExpoCdf()
    MakeBabyBoom()

    MakeParetoCdf()
    MakeParetoCdf2()
    MakeNormalCdf()

    # test the distribution of birth weights for normality
    preg = nsfg.ReadFemPreg()
    full_term = preg[preg.prglngth >= 37]

    weights = preg.totalwgt_lb.dropna()
    term_weights = full_term.totalwgt_lb.dropna()

    MakeNormalModel(weights)
    MakeNormalPlot(weights, term_weights)

Ejemplo n.º 20

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)
    
    preg = nsfg.ReadFemPreg()
    sf1 = PlotPregnancyData(preg)

    # make the plots based on Cycle 6
    resp6 = ReadFemResp2002()

    sf2 = PlotMarriageData(resp6)

    ResampleSurvival(resp6)

    PlotRemainingLifetime(sf1, sf2)

    # read Cycles 5 and 7
    resp5 = ReadFemResp1995()
    resp7 = ReadFemResp2010()

    # plot resampled survival functions by decade
    resps = [resp5, resp6, resp7]
    PlotResampledByDecade(resps)
    thinkplot.Save(root='survival4',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)

    # plot resampled survival functions by decade, with predictions
    PlotResampledByDecade(resps, predict_flag=True, omit=[5])
    thinkplot.Save(root='survival5',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)

Ejemplo n.º 21

0

Mostrar archivo

def main():
    thinkstats2.RandomSeed(17)

    # make the plots based on Cycle 6

    resp6 = ReadFemResp2002()
    resps = [resp6]

    sf_map = ResampleSurvivalByDecade(resps)
    sf_map_pred = ResampleSurvivalByDecade(resps, predict_flag=True)
    PlotSurvivalFunctions(sf_map)
    thinkplot.Save(root='marriage1', formats=['pdf'])
    return

    resp8 = ReadFemResp2013()
    Validate2013(resp8)
    return

    resp7 = ReadFemResp2010()
    Validate2010(resp7)
    return

    resp6 = ReadFemResp2002()
    Validate2002(resp6)
    return

    resp5 = ReadFemResp1995()
    Validate1995(resp5)
    return

    resp4 = ReadFemResp1988()
    Validate1988(resp4)
    return

    resp3 = ReadFemResp1982()
    Validate1982(resp3)
    return

Ejemplo n.º 22

0

Mostrar archivo

Archivo: density.py Proyecto: kangxi5200/Think-Stats2

def main():
    thinkstats2.RandomSeed(17)

    MakePdfExample()
    ComputeSkewnesses()

Ejemplo n.º 23

0

Mostrar archivo

def main():

    thinkstats2.RandomSeed(17)

    flights = ReadFlightData()
    #    print(flights.head())

    #    print(flights.DESTINATION_AIRPORT.to_string(index=False))

    airlines = ReadAirlineData()
    #    print(airlines.head())

    airports = ReadAirportData()
    #    print(airports.head())
    """ A minimum of 5 variables in your dataset used during your analysis (for help with selecting, the author made his selection on page 6 of your book). Consider what you think could have an impact on your question – remember this is never perfect, so don’t be worried if you miss one (Chapter 1).
    Describe what the 5 variables mean in the dataset (Chapter 1).
    DAY_OF_WEEK - Integer 1 - 7 corresponding to the day of the week.  1 is Monday and 7 is Sunday.
    AIRLINE - Letter code corresponding to the airline for the flight.
    ORIGIN_AIRPORT - Airport code corresponding to the flight's origin airport.
    DESTINATION_AIRPORT - Airport code corresponding to the flight's destination airport.
    DEPARTURE_DELAY - Integer value corresponding to the departure delay for the flight. Computed from SCHEDULED_DEPARTURE and DEPARTURE_TIME.
    ARRIVAL_DELAY - Integer value corresponding to the arrival delay for the flight.  Computed from SCHEDULED_ARRIVAL and ARRIVAL_TIME.
    """
    """Include a histogram of each of the 5 variables – in your summary and analysis, identify any outliers and explain the reasoning for them being outliers and how you believe they should be handled (Chapter 2).  
       Include the other descriptive characteristics about the variables: Mean, Mode, Spread, and Tails (Chapter 2).
    """
    createHistograms(flights, airlines, airports)

    alaska = flights[flights.AIRLINE == 'AS']
    #    print(alaska.head())
    notAlaska = flights[flights.AIRLINE != 'AS']
    #    print(notAlaska.head())
    """Using pg. 29 of your text as an example, compare two scenarios in your data using a PMF. 
       Reminder, this isn’t comparing two variables against each other – it is the same variable, 
       but a different scenario. Almost like a filter. The example in the book is first babies compared 
       to all other babies, it is still the same variable, but breaking the data out based on criteria 
       we are exploring (Chapter 3).
    """
    compareAlaskaAirlinesPmf(alaska, notAlaska)
    compareDetroitAirport(flights)
    compareDay4(flights)
    """ Create 1 CDF with one of your variables, using page 41-44 as your guide, what does this tell you 
        about your variable and how does it address the question you are trying to answer (Chapter 4).
    
    """
    compareAlaskaAirlinesCdf(alaska, notAlaska)

    arrivalDelays = flights.ARRIVAL_DELAY.dropna()
    """ Plot 1 analytical distribution and provide your analysis on how it applies to the dataset you have chosen (Chapter 5).
    
    """
    MakeNormalModel(arrivalDelays)
    MakeNormalPlot(arrivalDelays)
    """ Create two scatter plots comparing two variables and provide your analysis on correlation and causation. 
        Remember, covariance, Pearson’s correlation, and Non-Linear Relationships should also be considered during 
        your analysis (Chapter 7).
    """

    MakeAirlineArrivalDelayScatterPlots(flights)
    MakeArrivalDepartureDelayScatterPlots(flights)
    ComputeArrivalDepartureDelayCorrelations(flights)
    ComputeAirlineArrivalDelayCorrelations(flights)

    # Remove data with missing arrival delay
    # It seems most of the rows in the set with missing arrival delay is also missing values for other attributes
    # I do not feel this will have an impact for this analysis.
    """ Conduct a test on your hypothesis using one of the methods covered in Chapter 9.
    """
    hypothesisTestData = alaska.ARRIVAL_DELAY.dropna(
    ).values, notAlaska.ARRIVAL_DELAY.dropna().values
    RunAlaskaTests(hypothesisTestData)
    """ For this project, conduct a regression analysis on either one dependent and one explanatory variable, 
        or multiple explanatory variables (Chapter 10 & 11).  
    """
    PlotAirlineArrivalDelayFit(flights)
    PlotArrivalDepartureDelayFit(flights)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: DSC530_Paulovici_Exercise_8_2.py Proyecto: kevinpau/Bellevue_University_DSC_530

    live2 = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    data = live2.agepreg.values, live2.totalwgt_lb.values
    ht = hypothesis.CorrelationPermute(data)
    p3 = ht.PValue(iters=iters)

    # compare pregnancy lengths (chi-squared)
    data = firsts.prglngth.values, others.prglngth.values
    ht = hypothesis.PregLengthTest(data)
    p4 = ht.PValue(iters=iters)

    print("{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}".format(n, p1, p2, p3, p4))


#%%
# set the random generators
thinkstats2.RandomSeed(18)

# get the wght and length
live, firsts, others = first.MakeFrames()
RunSampleTest(first, others)

#%%
# run the test
n = len(live)
print("nval\t Test1\t  Test2\t  Test3\t  Test4\t")
for i in range(7):
    sample = thinkstats2.SampleRows(live, n)
    RunTests(sample)
    n //= 2

#%% [markdown]

Ejemplo n.º 25

0

Mostrar archivo

Archivo: populations.py Proyecto: kangxi5200/Think-Stats2

def main():
    thinkstats2.RandomSeed(17)
    MakeFigures()