Python Scatter Exemples, thinkplot.Scatter Python Exemples

Exemple #1

0

Afficher le fichier

def PlotQuadraticModel(daily, name):
    """
    """
    model, results = RunQuadraticModel(daily)
    regression.SummarizeResults(results)
    timeseries.PlotFittedValues(model, results, label=name)
    thinkplot.Save(root='timeseries11',
                   title='fitted values',
                   xlabel='years',
                   xlim=[-0.1, 3.8],
                   ylabel='price per gram ($)')

    timeseries.PlotResidualPercentiles(model, results)
    thinkplot.Save(root='timeseries12',
                   title='residuals',
                   xlabel='years',
                   ylabel='price per gram ($)')

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)
    thinkplot.Save(root='timeseries13',
                   title='predictions',
                   xlabel='years',
                   xlim=[years[0] - 0.1, years[-1] + 0.1],
                   ylabel='price per gram ($)')

Exemple #2

0

Afficher le fichier

Fichier : ch10.py Projet : smithb16/ThinkStats2

def ScatterFit(xs, ys, **options):
    inter, slope = LeastSquares(xs, ys)
    fit_xs, fit_ys = FitLine(xs, inter, slope)
    thinkplot.Scatter(xs, ys, color='blue', alpha=0.1, s=10)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='red', linewidth=2)
    thinkplot.Show(legend=False, **options)

Exemple #3

0

Afficher le fichier

Fichier : Week7_chap07sol_7.1_rkarna.py Projet : rkarna/ThinkStats2

def ScatterPlot(ages, weights, alpha=1.0):
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)

Exemple #4

0

Afficher le fichier

def PlotArrivalDepartureDelayFit(flights):
    """Plots a scatter plot and fitted curve.

    live: DataFrame
    """

    sample = thinkstats2.SampleRows(flights, 1000)
    arrivalDelays = sample.ARRIVAL_DELAY
    departureDelays = sample.DEPARTURE_DELAY
    inter, slope = thinkstats2.LeastSquares(arrivalDelays, departureDelays)
    fit_xs, fit_ys = thinkstats2.FitLine(arrivalDelays, inter, slope)

    thinkplot.Scatter(arrivalDelays, departureDelays, color='gray', alpha=0.1)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='blue', linewidth=2)
    thinkplot.Save(
        root='ArrivalDepartureDelayFit_linear1',
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                   axis=[10, 45, 0, 15],
        legend=False)

    formula = 'DEPARTURE_DELAY ~ ARRIVAL_DELAY'
    model = smf.ols(formula, data=sample)
    results = model.fit()
    regression.SummarizeResults(results)

Exemple #5

0

Afficher le fichier

def MakeArrivalDepartureDelayScatterPlots(flights):
    """Make scatterplots.
    """
    sample = thinkstats2.SampleRows(flights, 10000)

    # simple scatter plot
    thinkplot.PrePlot(cols=2)
    #    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample)
    #    airports = sample.AIRLINE
    #   arrivalDelays = sample.ARRIVAL_DELAY
    #    ScatterPlot(airports, arrivalDelays)

    # scatter plot with jitter
    #    thinkplot.SubPlot(2)
    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample,
                                                              hjitter=1.3,
                                                              wjitter=0.5)

    thinkplot.Scatter(arrivalDelays, departureDelays, alpha=1)
    thinkplot.Config(
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                     axis=[-20, 20, 20, 200],
        legend=False)

    thinkplot.Save(root='ArrivalDepartureDelayScatterplot')

Exemple #6

0

Afficher le fichier

def PlotEwmaPredictions(daily, name):
    """
    """

    # use EWMA to estimate slopes
    filled = timeseries.FillMissing(daily)
    filled['slope'] = pandas.ewma(filled.ppg.diff(), span=180)
    filled[-1:]

    # extract the last inter and slope
    start = filled.index[-1]
    inter = filled.ewma[-1]
    slope = filled.slope[-1]

    # reindex the DataFrame, adding a year to the end
    dates = pandas.date_range(filled.index.min(),
                              filled.index.max() + np.timedelta64(365, 'D'))
    predicted = filled.reindex(dates)

    # generate predicted values and add them to the end
    predicted['date'] = predicted.index
    one_day = np.timedelta64(1, 'D')
    predicted['days'] = (predicted.date - start) / one_day
    predict = inter + slope * predicted.days
    predicted.ewma.fillna(predict, inplace=True)

    # plot the actual values and predictions
    thinkplot.Scatter(daily.ppg, alpha=0.1, label=name)
    thinkplot.Plot(predicted.ewma)
    thinkplot.Save()

Exemple #7

0

Afficher le fichier

Fichier : 10_HO1.py Projet : fullern1/previouscode

def PlotScatter(age, wgt, xmin, xmax, ymin, ymax):
    thinkplot.Scatter(age, wgt, alpha=1.0)
    thinkplot.Config(xlabel='Age (Years)',
                     ylabel='Birth Weight (lbs)',
                     xlim=[xmin, xmax],
                     ylim=[ymin, ymax],
                     legend=False)
    thinkplot.Show()

Exemple #8

0

Afficher le fichier

Fichier : timeseries.py Projet : husseingb/ThinkStats2

def PlotFilled(daily, name):
    """Plots the EWMA and filled data.

    daily: DataFrame of daily prices
    """
    filled = FillMissing(daily, span=30)
    thinkplot.Scatter(filled.ppg, s=15, alpha=0.3, label=name)
    thinkplot.Plot(filled.ewma, label='EWMA', alpha=0.4)
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries8', ylabel='price per gram ($)')

Exemple #9

0

Afficher le fichier

Fichier : timeseries.py Projet : crskbel-ca/PythonBayesianStatistics

def PlotFittedValues(model, results, label=''):
    """Plots original data and fitted values.

    model: StatsModel model object
    results: StatsModel results object
    """
    years = model.exog[:, 1]
    values = model.endog
    thinkplot.Scatter(years, values, s=15, label=label)
    thinkplot.Plot(years, results.fittedvalues, label='model')

Exemple #10

0

Afficher le fichier

def PlotSimplePrediction(results, years):
    predict = GenerateSimplePrediction(results, years)

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.2, label=name)
    thinkplot.Plot(years, predict, color='#ff7f00')
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Show(title='Predictions',
                   xlabel='Years',
                   xlim=xlim,
                   ylabel='Price per gram ($)',
                   loc='upper right')

Exemple #11

0

Afficher le fichier

Fichier : timeseries.py Projet : crskbel-ca/PythonBayesianStatistics

def main(name):
    thinkstats2.RandomSeed(18)
    transactions = ReadData()

    dailies = GroupByQualityAndDay(transactions)
    PlotDailies(dailies)
    RunModels(dailies)
    PrintSerialCorrelations(dailies)
    MakeAcfPlot(dailies)

    name = 'high'
    daily = dailies[name]

    PlotLinearModel(daily, name)
    PlotRollingMean(daily, name)
    PlotFilled(daily, name)

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries4',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

    name = 'medium'
    daily = dailies[name]

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotIntervals(daily, years)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries5',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

Exemple #12

0

Afficher le fichier

Fichier : timeseries.py Projet : crskbel-ca/PythonBayesianStatistics

def PlotRollingMean(daily, name):
    """Plots rolling mean and EWMA.

    daily: DataFrame of daily prices
    """
    dates = pandas.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.PrePlot(cols=2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    roll_mean = pandas.rolling_mean(reindexed.ppg, 30)
    thinkplot.Plot(roll_mean, label='rolling mean')
    pyplot.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')

    thinkplot.SubPlot(2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    ewma = pandas.ewma(reindexed.ppg, span=30)
    thinkplot.Plot(ewma, label='EWMA')
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries10', formats=FORMATS)

Exemple #13

0

Afficher le fichier

Fichier : scatter.py Projet : avinashalapati09/dsc530

def ScatterPlot(heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Config(xlabel='height (cm)',
                     ylabel='weight (kg)',
                     axis=[140, 210, 20, 200],
                     legend=False)

Exemple #14

0

Afficher le fichier

def PlotFilled(daily, name):
    """Plot the EWMA and filled data.

    daily: DataFrame of daily prices
    name: string
    """
    filled = FillMissing(daily, span=30)
    thinkplot.Scatter(filled.ppg, s=15, alpha=0.2, label=name)
    thinkplot.Plot(filled.ewma, label='EWMA', color='#ff7f00')
    plt.xticks(rotation=30)
    thinkplot.Config(label='Price per gram ($)')
    thinkplot.Show()

Exemple #15

0

Afficher le fichier

Fichier : chap07soln.py Projet : UnderPaidMathematician/ThinkStats2

def ScatterPlot(ages, weights, alpha=1.0):
    """Make a scatter plot and save it.

    ages: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)

Exemple #16

0

Afficher le fichier

Fichier : brfss_scatter.py Projet : Ehsan1981/ThinkStats2

def ScatterPlot(root, heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    root: string filename root
    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Save(root=root,
                   xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

Exemple #17

0

Afficher le fichier

def PlotRollingMean(daily, name):
    """Plots rolling mean.

    daily: DataFrame of daily prices
    name: string
    """
    dates = pd.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.2, label=name)
    roll_mean = reindexed.ppg.rolling(30).mean()
    thinkplot.Plot(roll_mean, label='rolling mean', color='#ff7f00')
    plt.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')
    thinkplot.Show()

Exemple #18

0

Afficher le fichier

Fichier : recommendations.py Projet : qrsforever/workspace

def sim_pearson(perfs, p1, p2):
    """
    皮尔逊相关系数(Pearson correlation coefficient)
    cov(X, Y) / sigmaX*sigmaY
    协方差(X,Y) / X的标准方差*Y的标准方差
    """
    shared_items = {}
    for item in perfs[p1]:
        if item in perfs[p2]:
            shared_items[item] = 1

    n = len(shared_items)

    if n == 0: return 0 

    # p1, p2共同的影评数据
    data_p1 = [perfs[p1][it] for it in shared_items]
    data_p2 = [perfs[p2][it] for it in shared_items]

    # 计算影评均值
    mu_p1 = sum(data_p1) / n
    mu_p2 = sum(data_p2) / n
    #  print(mu_p1, mu_p2)

    # 计算标准方差
    var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n
    var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n
    #  print(var_p1, var_p2)

    if var_p1 == 0 or var_p2 == 0: return 0

    # 计算协方差
    cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n
    #  print(cov)

    # 计算皮尔逊相关系数
    r = cov / sqrt(var_p1*var_p2)

    # ============  thinkstat 方法 ===============

    if show:
        rr = correlation.Corr(data_p1, data_p2)
        print(r, rr)
        thinkplot.Clf()
        thinkplot.Scatter(data_p1, data_p2)
        thinkplot.Show()
    
    return r

Exemple #19

0

Afficher le fichier

Fichier : correlate1.py Projet : wu12345/ThinkStats2

def main(name, data_dir='.'):
    xs, ys = ReadData(data_dir)

    thinkplot.Scatter(xs, ys, alpha=0.05)
    thinkplot.Save(root='correlate1',
                   xlabel='Age (years)',
                   ylabel='Birth weight (oz)',
                   axis=[9, 45, 0, 250])

    print 'Pearson', thinkstats2.Corr(xs, ys)
    print 'Spearman', thinkstats2.SpearmanCorr(xs, ys)

    for i in range(10):
        print SimulateNull(list(xs), list(ys))

    print PValue(xs, ys, 1000)

Exemple #20

0

Afficher le fichier

Fichier : Assignment12.1EEdmunds.py Projet : tripleee19/EDA-of-Russian-Crime-Data

def scatter(x):
    tot_crimes = df.Total_crimes
    thinkplot.Scatter(df[x], tot_crimes, alpha=.5)
    if x == 'month':
        thinkplot.Show(title="Total Crimes vs Time",
                       xlabel="Year",
                       ylabel="Total Crimes")
    else:
        thinkplot.Show(title="Total Crimes vs " + x + " Crimes",
                       xlabel=x + " Crimes",
                       ylabel="Total Crimes")
        print(x + " crime stats")
        print("Spearman's correlation:",
              thinkstats2.SpearmanCorr(tot_crimes, df[x]))
        print("Covariance:", thinkstats2.Cov(tot_crimes, df[x]))
        print()

Exemple #21

0

Afficher le fichier

Fichier : timeseries.py Projet : crskbel-ca/PythonBayesianStatistics

def PlotDailies(dailies):
    """Makes a plot with daily prices for different qualities.

    dailies: map from name to DataFrame
    """
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            pyplot.xticks(rotation=30)
        else:
            thinkplot.Config(xticks=[])

    thinkplot.Save(root='timeseries1', formats=FORMATS)

Exemple #22

0

Afficher le fichier

def PlotFit(live):
    """Plots a scatter plot and fitted curve.

    live: DataFrame
    """
    ages = live.agepreg
    weights = live.totalwgt_lb
    inter, slope = thinkstats2.LeastSquares(ages, weights)
    fit_xs, fit_ys = thinkstats2.FitLine(ages, inter, slope)

    thinkplot.Scatter(ages, weights, color='gray', alpha=0.1)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='blue', linewidth=2)
    thinkplot.Save(root='linear1',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   axis=[10, 45, 0, 15],
                   legend=False)

Exemple #23

0

Afficher le fichier

Fichier : agemodel.py Projet : wu12345/ThinkStats2

def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    thinkplot.Clf()
    thinkplot.Cdf(pool.age_cdf)
    thinkplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    thinkplot.Clf()
    thinkplot.Cdf(pool.weight_cdf)
    thinkplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    thinkplot.Clf()
    thinkplot.Cdfs([firsts.age_cdf, others.age_cdf])
    thinkplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    thinkplot.Clf()
    thinkplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    thinkplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    thinkplot.clf()
    thinkplot.Scatter(ages, weights, alpha=0.2)
    thinkplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)

Exemple #24

0

Afficher le fichier

def main():
    random.seed(17)

    rho = -0.8
    res = CorrelatedGenerator(1000, rho)
    xs, ys = zip(*res)

    a = 1.0
    b = 0.0
    xs = [a * x + b for x in xs]

    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'covariance', thinkstats2.Cov(xs, ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)
    print 'Spearman corr', thinkstats2.SpearmanCorr(xs, ys)

    thinkplot.Scatter(xs, ys)
    thinkplot.Show()

Exemple #25

0

Afficher le fichier

def main():
    random.seed(17)

    rho = 0.8
    xs, ys = SatIqData(1000, rho)
    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)

    inter, slope = thinkstats2.LeastSquares(xs, ys)
    print 'inter', inter
    print 'slope', slope

    fxs, fys = thinkstats2.FitLine(xs, inter, slope)
    res = thinkstats2.Residuals(xs, ys, inter, slope)
    R2 = thinkstats2.CoefDetermination(ys, res)
    print 'R2', R2

    thinkplot.Plot(fxs, fys, color='gray', alpha=0.2)
    thinkplot.Scatter(xs, ys)
    thinkplot.Show()

Exemple #26

0

Afficher le fichier

Fichier : Week10_chap12sol_12.1n12.2_rkarna.py Projet : rkarna/ThinkStats2

def PlotQuadraticModel(daily, name):
    model, results = RunQuadraticModel(daily)
    regression.SummarizeResults(results)
    timeseries.PlotFittedValues(model, results, label=name)
    thinkplot.Save(root='Output_Timeseries1',
                   title='Fitted Val',
                   xlabel='yr',
                   xlim=[-0.2, 4],
                   ylabel='price per gram ($)')

    timeseries.PlotResidualPercentiles(model, results)
    thinkplot.Save(root='Output_Timeseries2',
                   title='Residual',
                   xlabel='yr',
                   ylabel='price per gram ($)')

    years = np.linspace(0, 10, 200)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)
    thinkplot.Save(root='Output_Timeseries3',
                   title='Predict',
                   xlabel='yr',
                   xlim=[years[0]-0.1, years[-1]+0.1],
                   ylabel='price per gram ($)')

Exemple #27

0

Afficher le fichier

#%%
# plot fitted values
timeseries.PlotFittedValues(model, results, label=name)
thinkplot.Config(title='Fitted Values',
                 xlabel='years',
                 xlim=[-0.1, 3.8],
                 ylabel='price ($)/gram')

#%%
# plot predictions

# set linear spacing of years
years = np.linspace(0, 5, 101)

thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)

thinkplot.Config(title='predictions',
                 xlabel='years',
                 xlim=[years[0] - 0.1, years[-1] + 0.1],
                 ylabel='price ($)/gram')

#%% [markdown]
# ### Exercise 12.2
# Write a definition for a class named `SerialCorrelationTest` that extends `HypothesisTest` from Section 9.2. It should take a series and a lag as data, compute the serial correlation of the series with the given lag, and then compute the p-value of the observed correlation.
#
# Use this class to test whether the serial correlation in raw price data is statistically significant. Also test the residuals of the linear model and (if you did the previous exercise), the quadratic model.


#%%

Exemple #28

0

Afficher le fichier

cdf = thinkstats2.Cdf(df.Age)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Age', ylabel='CDF')

#plot normal distribution
mean = df.Age.mean()
std = df.Age.std()
xs = [-4, 4]
fxs, fys = thinkstats2.FitLine(xs, inter=mean, slope=std)
thinkplot.Plot(fxs, fys, color='gray', label='model')
xs, ys = thinkstats2.NormalProbability(df.Age)
thinkplot.Plot(xs, ys, label='Age')

#scatter plots and correlation
#year vs. age
year = thinkstats2.Jitter(df.Year, .25)
thinkplot.Scatter(year, df.Age)
thinkplot.Show(xlabel='Year', ylabel='Age')
thinkstats2.Corr(df.Year, df.Age)
#drug vs. age
thinkplot.Scatter(df.Age, df.Drug)
thinkplot.Show(xlabel='Age', ylabel='Drug')

#testing a difference in gender
data = male.Age.values, female.Age.values
ht = DiffMeansPermute(data)
pvalue = ht.PValue()
print(pvalue)
ht.PlotCdf()
thinkplot.Config(xlabel='test statistic', ylabel='CDF')

Exemple #29

0

Afficher le fichier

# imports
from __future__ import print_function, division
%matplotlib inline
import numpy as np
import thinkstats2
import thinkplot

# get data
import first
live, firsts, others = first.MakeFrames()
live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
ages = live.agepreg
weights = live.totalwgt_lb

# make a scatter plot of birth weight versus mother’s age
thinkplot.Scatter(ages, weights, alpha=1, s=10)
thinkplot.Config(xlabel='Age (years)',
                 ylabel='Weight (lbs)',
                 xlim=[10, 45],
                 ylim=[0, 15],
                 legend=False)
# RESULTS: messy plot

# Plot percentiles of birth weight versus mother’s age
bins = np.arange(10, 45, 5)
indices = np.digitize(live.agepreg, bins)
groups = live.groupby(indices)
# binned mother's age

mean_ages = [group.agepreg.mean() for i, group in groups]
cdfs_wgt= [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups]

Exemple #30

0

Afficher le fichier

Fichier : Mayasa_Dablan_cs240_project.py Projet : MayasaD/cs-project-

        meanx = np.mean(xs)
    if meany is None:
        meany = np.mean(ys)

    cov = np.dot(xs-meanx, ys-meany) / len(xs)
    return cov


# In[65]:

def Jitter(values, jitter=0.5): # the Jitter is plotted using the Jitter module and scatter
    n = len(values)           
    return np.random.normal(0, jitter, n) + values
heights = Jitter(wins, 1.4)
weights = Jitter(runs, 0.5)
thinkplot.Scatter(heights, weights, alpha=0.9, s=40,color='blue')
thinkplot.Config(xlabel='Wins',
                 ylabel='Runs',
                 axis=[0, 210, 20, 200],
                 legend=False)


# In[66]:

# In the Fifth part 
# the covariance is needed in order to compute the correlation
Cov(wins_sample, runs_sample)


# In[67]: