Python Cdf Examples, empiricaldist.Cdf Python Examples

Example #1

0

Show file

def plot_cdf(T, S):
    cdfT = Cdf.from_seq(T)
    cdfS = Cdf.from_seq(S)

    fig = plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    cdfT.plot(xlim=(0, 50), xlabel="Avalanche duration", ylabel="CDF")

    plt.subplot(1, 2, 2)
    cdfS.plot(xlim=(0, 50), xlabel="Avalanche size", ylabel="CDF")
    plt.show('PMF size and duration')

    fig.savefig("cdf_plot.png")

Example #2

0

Show file

File: test_empiricaldist.py Project: levatnov00/empiricaldist

    def testHazard(self):
        t = [1, 2, 2, 3, 5]
        haz = Hazard.from_seq(t)

        # () uses forward to interpolate
        self.assertAlmostEqual(haz(1), 0.2)
        self.assertAlmostEqual(haz(2), 0.5)
        self.assertAlmostEqual(haz(3), 0.5)
        self.assertAlmostEqual(haz(4), 0)
        self.assertAlmostEqual(haz(5), 1.0)
        self.assertAlmostEqual(haz(6), 0)

        xs = [0, 1, 2, 3, 4, 5, 6]
        res = haz(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        cdf = Cdf.from_seq(t)
        haz2 = cdf.make_hazard()
        res = haz2(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        surv = Surv.from_seq(t)
        haz3 = surv.make_hazard()
        res = haz3(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

Example #3

0

Show file

def comparing_cdfs(log_income, dist):
    # Evaluate the model CDF
    xs = np.linspace(2, 5.5)
    ys = dist.cdf(xs)

    # Plot the model CDF
    plt.clf()
    plt.plot(xs, ys, color='gray')

    # Create and plot the Cdf of log_income
    Cdf.from_seq(log_income).plot()

    # Label the axes
    plt.xlabel('log10 of realinc')
    plt.ylabel('CDF')
    plt.show()

Example #4

0

Show file

File: test_empiricaldist.py Project: levatnov00/empiricaldist

    def testCdf(self):
        # if the quantities are not numeric, you can use [] but not ()
        cdf = Cdf.from_seq(list('allen'))
        self.assertAlmostEqual(cdf['a'], 0.2)
        self.assertAlmostEqual(cdf['e'], 0.4)
        self.assertAlmostEqual(cdf['l'], 0.8)
        self.assertAlmostEqual(cdf['n'], 1.0)

        t = [1, 2, 2, 3, 5]
        cdf = Cdf.from_seq(t)

        # () uses forward to interpolate
        self.assertEqual(cdf(0), 0)
        self.assertAlmostEqual(cdf(1), 0.2)
        self.assertAlmostEqual(cdf(2), 0.6)
        self.assertAlmostEqual(cdf(3), 0.8)
        self.assertAlmostEqual(cdf(4), 0.8)
        self.assertAlmostEqual(cdf(5), 1)
        self.assertAlmostEqual(cdf(6), 1)

        xs = range(-1, 7)
        ps = cdf(xs)
        for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
            self.assertAlmostEqual(p1, p2)

        self.assertEqual(cdf.inverse(0), 1)
        self.assertEqual(cdf.inverse(0.1), 1)
        self.assertEqual(cdf.inverse(0.2), 1)
        self.assertEqual(cdf.inverse(0.3), 2)
        self.assertEqual(cdf.inverse(0.4), 2)
        self.assertEqual(cdf.inverse(0.5), 2)
        self.assertEqual(cdf.inverse(0.6), 2)
        self.assertEqual(cdf.inverse(0.7), 3)
        self.assertEqual(cdf.inverse(0.8), 3)
        self.assertEqual(cdf.inverse(0.9), 5)
        self.assertEqual(cdf.inverse(0.99999), 5)
        self.assertEqual(cdf.inverse(1), 5)

        ps = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        qs = cdf.inverse(ps)
        self.assertTrue((qs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())

        np.random.seed(42)
        xs = cdf.choice(7, replace=True)
        self.assertListEqual(xs.tolist(), [2, 5, 3, 2, 1, 1, 1])

Example #5

0

Show file

File: ch09_sugarscape.py Project: smithb16/ThinkComplexity2

def metabolism_distribution(env):
    """Make CDF of metabolism distribution.

    env: Sugarscape
    """
    cdf = Cdf.from_seq(agent.metabolism for agent in env.agents)
    cdf.plot()
    decorate(xlabel='Metabolism', ylabel='CDF')
    plt.show(block=True)

Example #6

0

Show file

File: ch09_sugarscape.py Project: smithb16/ThinkComplexity2

def vision_distribution(env):
    """Make CDF of vision distance.

    env: Sugarscape
    """
    cdf = Cdf.from_seq(agent.vision for agent in env.agents)
    cdf.plot()
    decorate(xlabel='Vision', ylabel='CDF')
    plt.show(block=True)

Example #7

0

Show file

File: 21_pmf_and_cdf.py Project: printfCRLF/pp

def make_a_cdf(gss):
    # Select the age column
    age = gss['age'].values

    # Compute the CDF of age
    cdf_age = Cdf.from_seq(age)

    # Calculate the CDF of 30
    print(cdf_age[30.0])

Example #8

0

Show file

File: test_empiricaldist.py Project: TynClause/empiricaldist

    def testCdfSampling(self):
        cdf = Cdf.from_seq([1, 2, 3, 4, 5, 6])
        expected = [2, 4, 2, 1, 5, 4, 4, 4, 1, 3]

        np.random.seed(17)
        a = cdf.choice(10)
        self.assertTrue(np.all((a == expected)))

        a = cdf.sample(10, replace=True, random_state=17)
        self.assertTrue(np.all((a == expected)))

Example #9

0

Show file

def plot_fitnesses(sim):
    """Plot the CDF of fitnesses.

    sim: Simulation object
    """
    fits = sim.get_fitnesses()
    cdf_fitness = Cdf.from_seq(fits)
    print('Mean fitness\n', np.mean(fits))
    cdf_fitness.plot()
    decorate(xlabel='Fitness', ylabel='CDF')
    plt.show(block=True)

Example #10

0

Show file

File: test_empiricaldist.py Project: TynClause/empiricaldist

    def testNormalize(self):
        t = [0, 1, 2, 3, 3, 4, 4, 4, 5]

        pmf = Pmf.from_seq(t, normalize=False)
        total = pmf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(pmf[3], 0.22222222)

        cdf = Cdf.from_seq(t, normalize=False)
        total = cdf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(cdf(3), 0.55555555)

Example #11

0

Show file

File: 21_pmf_and_cdf.py Project: printfCRLF/pp

def plot_a_cdf(gss):
    # Select realinc
    income = gss["realinc"].values

    # Make the CDF
    cdf_income = Cdf.from_seq(income)

    # Plot it
    cdf_income.plot()

    # Label the axes
    plt.xlabel('Income (1986 USD)')
    plt.ylabel('CDF')
    plt.show()

Example #12

0

Show file

File: 21_pmf_and_cdf.py Project: printfCRLF/pp

def compute_iqr(gss):
    income = gss["realinc"].values
    cdf_income = Cdf.from_seq(income)

    # Calculate the 75th percentile
    percentile_75th = cdf_income.inverse(0.75)

    # Calculate the 25th percentile
    percentile_25th = cdf_income.inverse(0.25)

    # Calculate the interquartile range
    iqr = percentile_75th - percentile_25th

    # Print the interquartile range
    print(iqr)

Example #13

0

Show file

File: ch09_sugarscape.py Project: smithb16/ThinkComplexity2

def wealth_distribution(env, plot=True):
    """Make CDF of sugar distribution.

    env: Sugarscape
    """
    qs = [0.25, 0.5, 0.75, 0.9]
    cdf = Cdf.from_seq(agent.sugar for agent in env.agents)
    for q in qs:
        print('Wealth of {:.0%}'.format(q), end='')
        print(': %i' %cdf.quantile(q))

    if plot:
        cdf.plot()
        decorate(xlabel='Wealth', ylabel='CDF')
        plt.show(block=True)

    return cdf

Example #14

0

Show file

def plot_income_cdfs(gss, high, assc, bach):
    income = gss['realinc']

    # Plot the CDFs
    Cdf.from_seq(income[high]).plot(label='High school')
    Cdf.from_seq(income[assc]).plot(label='Associate')
    Cdf.from_seq(income[bach]).plot(label='Bachelor')

    # Label the axes
    plt.xlabel('Income (1986 USD)')
    plt.ylabel('CDF')
    plt.legend()
    plt.show()

Example #15

0

Show file

File: test_empiricaldist.py Project: TynClause/empiricaldist

    def testCdfComparison(self):
        d4 = Cdf.from_seq(range(1,5))
        self.assertEqual(d4.gt_dist(2), 0.5)
        self.assertEqual(d4.gt_dist(d4), 0.375)

        self.assertEqual(d4.lt_dist(2), 0.25)
        self.assertEqual(d4.lt_dist(d4), 0.375)

        self.assertEqual(d4.ge_dist(2), 0.75)
        self.assertEqual(d4.ge_dist(d4), 0.625)

        self.assertEqual(d4.le_dist(2), 0.5)
        self.assertEqual(d4.le_dist(d4), 0.625)

        self.assertEqual(d4.eq_dist(2), 0.25)
        self.assertEqual(d4.eq_dist(d4), 0.25)

        self.assertEqual(d4.ne_dist(2), 0.75)
        self.assertEqual(d4.ne_dist(d4), 0.75)

Example #16

0

Show file

File: test_empiricaldist.py Project: TynClause/empiricaldist

    def testConversionFunctions(self):
        t = [1, 2, 2, 3, 5, 5, 7, 10]
        pmf = Pmf.from_seq(t)
        cdf = Cdf.from_seq(t)
        surv = Surv.from_seq(t)
        haz = Hazard.from_seq(t)

        cdf2 = pmf.make_cdf()
        self.almost_equal_dist(cdf, cdf2)

        surv2 = pmf.make_surv()
        self.almost_equal_dist(surv, surv2)

        haz2 = pmf.make_hazard()
        self.almost_equal_dist(haz, haz2)

        surv3 = haz2.make_surv()
        self.almost_equal_dist(surv, surv3)

        cdf3 = haz2.make_cdf()
        self.almost_equal_dist(cdf, cdf3)

        pmf3 = haz2.make_pmf()
        self.almost_equal_dist(pmf, pmf3)

Example #17

0

Show file

File: lqh_st.py Project: mcuevasg/Prueba_Canal

    if select == 'Prime':
        fig = px.line(prime, x='Fecha', y=hogar_canales)
    elif select == 'Prime Segunda Franja':
        fig = px.line(prime2, x='Fecha', y=hogar_canales)
    elif select == 'Off Prime PM':
        fig = px.line(offprime, x='Fecha', y=hogar_canales)
    else:
        fig = px.line(off2, x='Fecha', y=hogar_canales)

    st.plotly_chart(fig)

# fig_bar=px.bar(salida_Franja,x='Franja',y=('SH_C13','mean'))
# st.plotly_chart(fig_bar)
##########################   FUNCIONES DE DISTRIBUCION   ######################################################################
cdf_p = Cdf.from_seq(prime['SH_C13'])
cdf_o = Cdf.from_seq(offprime['SH_C13'])

x = np.array(cdf_p.index)
y = cdf_p.values

min_x = int(np.around(x.min()))
max_x = int(np.around(x.max()))

share_min = st.sidebar.slider("Share Hogar Minimo ", min_x, max_x)
share_max = st.sidebar.slider("Share Hogar Maximo ", min_x, max_x)

#probabilidad_1=round((cdf(share_cdf_2))*100,1)
#probabilidad_2=round((1-cdf(share_cdf_2))*100,1)

st.markdown("## Probabilidad Share Hogar*")

Example #18

0

Show file

File: test_empiricaldist.py Project: TynClause/empiricaldist

 def testPmfFromCdf(self):
     t = [1, 2, 2, 3, 5]
     pmf = Pmf.from_seq(t)
     cdf = Cdf.from_seq(t)
     pmf2 = cdf.make_pmf()
     self.almost_equal_dist(pmf, pmf2)

Example #19

0

Show file

File: ch4.py Project: jaredparmer/ThinkComplexity

##pmf_ba.plot(label='BA model', color='C2', **options)
##plt.xlabel('Degree')
##plt.xscale('log')
##plt.yscale('log')
##plt.legend()
##
##plt.savefig('figs/chap04-3')
##plt.close()

# using Downey's code to make a BA graph and seeing how it works
##print("Constructing BA(20, 3) graph")
##ba_bespoke = barabasi_albert_graph(20, 3)
##nx.draw_circular(ba_bespoke, node_size=700, with_labels=True)
##plt.show()
""" now use cumulative distribution function objects to represent the data """
cdf_fb = Cdf.from_seq(degrees(fb), name='facebook')
cdf_ws = Cdf.from_seq(degrees(ws), name='WS model')
cdf_ba = Cdf.from_seq(degrees(ba), name='BA model')

# now plot the models on log-x scale to compare with the fb data
##plt.figure(figsize=(8,4.5))
##plt.subplot(1,2,1)
##cdf_fb.plot(color='C0')
##cdf_ws.plot(color='C1')
##plt.xlabel('Degree')
##plt.xscale('log')
##plt.ylabel('CDF')
##plt.legend()
##
##plt.subplot(1,2,2)
##cdf_fb.plot(color='C0')

Example #20

0

Show file

    """
    ## Read data from Facebook file
    dirname = '/Users/bensmith/Documents/ThinkSeries/ThinkComplexity2/data/'
    fin = dirname + 'facebook_combined.txt.gz'
    fb = read_graph(fin)

    n, m, k, pmf_fb = analyze_graph(fb, verbose=True)
    print('pmf_fb:\n',type(pmf_fb))

    ## Build ws & ba models that closely represent Facebook data
    ws = nx.watts_strogatz_graph(n, k, 0.05, seed=15)
    ba = nx.barabasi_albert_graph(n, k, seed=15)
    hk = generate_hk_graph(n, k, 1, seed=15)

    ## Generate CDFs of three graphs
    cdf_fb = Cdf.from_seq(degrees(fb), name='Facebook')
    cdf_ws = Cdf.from_seq(degrees(ws), name='Watts-Strogatz')
    cdf_ba = Cdf.from_seq(degrees(ba), name='Barabasi-Albert')
    cdf_hk = Cdf.from_seq(degrees(hk), name='Holme-Kim')

    ## Generate HK graph that mimics Facebook data
    ps = np.logspace(-4, 0, 9)

    for p in ps:
        G = generate_hk_graph(n, k, p)
        print('\np: ',p)
        n, m, k, pmf_hk = analyze_graph(G, verbose=True)

    ## Generate figures comparing degree of facebook to degree of WS & BA models
    plt.figure(figsize=(8,4))

Example #21

0

Show file

File: Models_Flow.py Project: Sathishvp7/CodeBook_Regression

df.dtypes # to get Data type of each column
info_df = df.info() # Information like Datatype number of Null values
describe = df.describe()

# Count the Numbe of int,float,Object columns in the dataset
count_dtypes = df.dtypes.value_counts()

# Note 1  - For a models if input is in Numeric it will learn better

# Now we going to find PMF value
from empiricaldist import Pmf,Cdf
#pmf - probablity Distibution function - Probablity of particular Variable value.
# cdf - Cummulative Disribution Function -  Sum of all possible probablity 
sp = df.SalePrice
Pmf_SalePrice = pd.DataFrame(data= {'Probablity_Mass_Function': Pmf.from_seq(sp),
                                    'Cummulative_Mass_Function' : Cdf.from_seq(sp)},
                                    index= sp).sort_values(['SalePrice'])

#Visulazisation of cdf
#Note 2
'''CDF helps to understand how may precent of the total data 
is below or above a specified threshold'''
cdf = Cdf.from_seq(sp)
cdf.plot()

# 4. DATA WRANGLING 
'''
Inspecting missing values in each variables and trying to impute statistically acceptable values.
Detect outliers and remove those records.
Remove irrelevant records. Ex. Records with negative age etc
'''

Example #22

0

Show file

def get_cdfs(y_true: pd.Series, y_pred: pd.Series):
    y_true = flatten_values(y_true)
    y_pred = flatten_values(y_pred)
    y_true_cdf = CDF.from_seq(y_true)
    y_pred_cdf = CDF.from_seq(y_pred)
    return y_true, y_pred