Python Jitter Examples

Programming Language: Python

Namespace/Package Name: thinkstats2

Method/Function: Jitter

Examples at hotexamples.com: 4

Python Jitter - 4 examples found. These are the top rated real world Python examples of thinkstats2.Jitter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def GetArrivalDepartureDelay(flights, hjitter=0.0, wjitter=0.0):
    """Get sequences of airports and arrival delays.

    df: 
    hjitter: float magnitude of random noise added to heights
    wjitter: float magnitude of random noise added to weights

    returns: tuple of sequences (airport, arrivaldelays)
    """
    arrivalDelays = flights.ARRIVAL_DELAY
    if hjitter:
        arrivalDelays = thinkstats2.Jitter(arrivalDelays, hjitter)

    departureDelays = flights.DEPARTURE_DELAY
    if wjitter:
        departureDelays = thinkstats2.Jitter(departureDelays, wjitter)

    return arrivalDelays, departureDelays

Example #2

Show file

File: scatter.py Project: avinashalapati09/dsc530

def GetHeightWeight(df, hjitter=0.0, wjitter=0.0):
    """Get sequences of height and weight.

    df: DataFrame with htm3 and wtkg2
    hjitter: float magnitude of random noise added to heights
    wjitter: float magnitude of random noise added to weights

    returns: tuple of sequences (heights, weights)
    """
    heights = df.htm3
    if hjitter:
        heights = thinkstats2.Jitter(heights, hjitter)

    weights = df.wtkg2
    if wjitter:
        weights = thinkstats2.Jitter(weights, wjitter)

    return heights, weights

Example #3

Show file

cdf = thinkstats2.Cdf(df.Age)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Age', ylabel='CDF')

#plot normal distribution
mean = df.Age.mean()
std = df.Age.std()
xs = [-4, 4]
fxs, fys = thinkstats2.FitLine(xs, inter=mean, slope=std)
thinkplot.Plot(fxs, fys, color='gray', label='model')
xs, ys = thinkstats2.NormalProbability(df.Age)
thinkplot.Plot(xs, ys, label='Age')

#scatter plots and correlation
#year vs. age
year = thinkstats2.Jitter(df.Year, .25)
thinkplot.Scatter(year, df.Age)
thinkplot.Show(xlabel='Year', ylabel='Age')
thinkstats2.Corr(df.Year, df.Age)
#drug vs. age
thinkplot.Scatter(df.Age, df.Drug)
thinkplot.Show(xlabel='Age', ylabel='Drug')

#testing a difference in gender
data = male.Age.values, female.Age.values
ht = DiffMeansPermute(data)
pvalue = ht.PValue()
print(pvalue)
ht.PlotCdf()
thinkplot.Config(xlabel='test statistic', ylabel='CDF')

Example #4

Show file

def CorrelationPlots(df,
                     xlabel,
                     ylabel,
                     xjitter=0,
                     yjitter=0,
                     axis=None,
                     nbins=5,
                     **options):

    cleaned = df.dropna(subset=[xlabel, ylabel])
    xs = cleaned[xlabel]
    ys = cleaned[ylabel]

    xs = thinkstats2.Jitter(xs, xjitter)
    ys = thinkstats2.Jitter(ys, yjitter)

    xmin, xmax = min(xs), max(xs)
    ymin, ymax = min(ys), max(ys)
    if axis is None:
        axis = [xmin, xmax, ymin, ymax]

    PrePlot(num=4, rows=2, cols=2)

    # make scatter plot
    SubPlot(1)
    Scatter(xs, ys, alpha=0.1, s=10)
    Config(xlabel=xlabel, ylabel=ylabel, axis=axis, legend=False)

    # make HexBin plot
    SubPlot(2)
    HexBin(xs, ys)
    Config(xlabel=xlabel, ylabel=ylabel, axis=axis, legend=False)

    # plot percentiles
    SubPlot(3)

    xs_cdf = thinkstats2.Cdf(xs)
    lower = xs_cdf.Percentile(1)
    upper = xs_cdf.Percentile(99)

    bins = np.arange(lower, upper, nbins)
    indices = np.digitize(xs, bins)
    groups = cleaned.groupby(indices)
    mean_xs = [group[xlabel].mean() for i, group in groups]
    cdfs = [thinkstats2.Cdf(group[ylabel]) for i, group in groups]

    for percent in [75, 50, 25]:
        y_percentiles = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        Plot(mean_xs, y_percentiles, label=label)

    Config(xlabel=xlabel, ylabel=ylabel, axis=axis, legend=True)

    # plot CDFs
    n = (upper - lower) // (nbins - 2)
    bins = np.arange(lower, upper, n)
    indices = np.digitize(cleaned[xlabel], bins)
    groups = cleaned.groupby(indices)
    mean_xs = [group[xlabel].mean() for i, group in groups]
    cdfs = [thinkstats2.Cdf(group[ylabel]) for i, group in groups]

    ## plot the cdfs
    SubPlot(4)
    PrePlot(len(cdfs))
    for i, cdf in enumerate(cdfs):
        if i == 0:
            label = '<%d ' % bins[0] + xlabel
        elif i == len(cdfs) - 1:
            label = '>%d ' % bins[-1] + xlabel
        else:
            label = '%d - %d ' % (bins[i - 1], bins[i]) + xlabel
        Cdf(cdf, label=label)
        Config(xlabel=ylabel, ylabel='CDF', legend=True)

    #print statistics
    print('Correlation:\n', thinkstats2.Corr(xs, ys))
    print('Spearman Correlation Coefficient:\n',
          thinkstats2.SpearmanCorr(xs, ys))