Python Cdf Exemples, empiricaldist.Cdf Python Exemples

Exemple #1

0

Afficher le fichier

def plot_cdf(T, S):
    cdfT = Cdf.from_seq(T)
    cdfS = Cdf.from_seq(S)

    fig = plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    cdfT.plot(xlim=(0, 50), xlabel="Avalanche duration", ylabel="CDF")

    plt.subplot(1, 2, 2)
    cdfS.plot(xlim=(0, 50), xlabel="Avalanche size", ylabel="CDF")
    plt.show('PMF size and duration')

    fig.savefig("cdf_plot.png")

Exemple #2

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : levatnov00/empiricaldist

    def testHazard(self):
        t = [1, 2, 2, 3, 5]
        haz = Hazard.from_seq(t)

        # () uses forward to interpolate
        self.assertAlmostEqual(haz(1), 0.2)
        self.assertAlmostEqual(haz(2), 0.5)
        self.assertAlmostEqual(haz(3), 0.5)
        self.assertAlmostEqual(haz(4), 0)
        self.assertAlmostEqual(haz(5), 1.0)
        self.assertAlmostEqual(haz(6), 0)

        xs = [0, 1, 2, 3, 4, 5, 6]
        res = haz(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        cdf = Cdf.from_seq(t)
        haz2 = cdf.make_hazard()
        res = haz2(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        surv = Surv.from_seq(t)
        haz3 = surv.make_hazard()
        res = haz3(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

Exemple #3

0

Afficher le fichier

def comparing_cdfs(log_income, dist):
    # Evaluate the model CDF
    xs = np.linspace(2, 5.5)
    ys = dist.cdf(xs)

    # Plot the model CDF
    plt.clf()
    plt.plot(xs, ys, color='gray')

    # Create and plot the Cdf of log_income
    Cdf.from_seq(log_income).plot()

    # Label the axes
    plt.xlabel('log10 of realinc')
    plt.ylabel('CDF')
    plt.show()

Exemple #4

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : levatnov00/empiricaldist

    def testCdf(self):
        # if the quantities are not numeric, you can use [] but not ()
        cdf = Cdf.from_seq(list('allen'))
        self.assertAlmostEqual(cdf['a'], 0.2)
        self.assertAlmostEqual(cdf['e'], 0.4)
        self.assertAlmostEqual(cdf['l'], 0.8)
        self.assertAlmostEqual(cdf['n'], 1.0)

        t = [1, 2, 2, 3, 5]
        cdf = Cdf.from_seq(t)

        # () uses forward to interpolate
        self.assertEqual(cdf(0), 0)
        self.assertAlmostEqual(cdf(1), 0.2)
        self.assertAlmostEqual(cdf(2), 0.6)
        self.assertAlmostEqual(cdf(3), 0.8)
        self.assertAlmostEqual(cdf(4), 0.8)
        self.assertAlmostEqual(cdf(5), 1)
        self.assertAlmostEqual(cdf(6), 1)

        xs = range(-1, 7)
        ps = cdf(xs)
        for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
            self.assertAlmostEqual(p1, p2)

        self.assertEqual(cdf.inverse(0), 1)
        self.assertEqual(cdf.inverse(0.1), 1)
        self.assertEqual(cdf.inverse(0.2), 1)
        self.assertEqual(cdf.inverse(0.3), 2)
        self.assertEqual(cdf.inverse(0.4), 2)
        self.assertEqual(cdf.inverse(0.5), 2)
        self.assertEqual(cdf.inverse(0.6), 2)
        self.assertEqual(cdf.inverse(0.7), 3)
        self.assertEqual(cdf.inverse(0.8), 3)
        self.assertEqual(cdf.inverse(0.9), 5)
        self.assertEqual(cdf.inverse(0.99999), 5)
        self.assertEqual(cdf.inverse(1), 5)

        ps = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        qs = cdf.inverse(ps)
        self.assertTrue((qs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())

        np.random.seed(42)
        xs = cdf.choice(7, replace=True)
        self.assertListEqual(xs.tolist(), [2, 5, 3, 2, 1, 1, 1])

Exemple #5

0

Afficher le fichier

Fichier : ch09_sugarscape.py Projet : smithb16/ThinkComplexity2

def metabolism_distribution(env):
    """Make CDF of metabolism distribution.

    env: Sugarscape
    """
    cdf = Cdf.from_seq(agent.metabolism for agent in env.agents)
    cdf.plot()
    decorate(xlabel='Metabolism', ylabel='CDF')
    plt.show(block=True)

Exemple #6

0

Afficher le fichier

Fichier : ch09_sugarscape.py Projet : smithb16/ThinkComplexity2

def vision_distribution(env):
    """Make CDF of vision distance.

    env: Sugarscape
    """
    cdf = Cdf.from_seq(agent.vision for agent in env.agents)
    cdf.plot()
    decorate(xlabel='Vision', ylabel='CDF')
    plt.show(block=True)

Exemple #7

0

Afficher le fichier

Fichier : 21_pmf_and_cdf.py Projet : printfCRLF/pp

def make_a_cdf(gss):
    # Select the age column
    age = gss['age'].values

    # Compute the CDF of age
    cdf_age = Cdf.from_seq(age)

    # Calculate the CDF of 30
    print(cdf_age[30.0])

Exemple #8

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : TynClause/empiricaldist

    def testCdfSampling(self):
        cdf = Cdf.from_seq([1, 2, 3, 4, 5, 6])
        expected = [2, 4, 2, 1, 5, 4, 4, 4, 1, 3]

        np.random.seed(17)
        a = cdf.choice(10)
        self.assertTrue(np.all((a == expected)))

        a = cdf.sample(10, replace=True, random_state=17)
        self.assertTrue(np.all((a == expected)))

Exemple #9

0

Afficher le fichier

def plot_fitnesses(sim):
    """Plot the CDF of fitnesses.

    sim: Simulation object
    """
    fits = sim.get_fitnesses()
    cdf_fitness = Cdf.from_seq(fits)
    print('Mean fitness\n', np.mean(fits))
    cdf_fitness.plot()
    decorate(xlabel='Fitness', ylabel='CDF')
    plt.show(block=True)

Exemple #10

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : TynClause/empiricaldist

    def testNormalize(self):
        t = [0, 1, 2, 3, 3, 4, 4, 4, 5]

        pmf = Pmf.from_seq(t, normalize=False)
        total = pmf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(pmf[3], 0.22222222)

        cdf = Cdf.from_seq(t, normalize=False)
        total = cdf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(cdf(3), 0.55555555)

Exemple #11

0

Afficher le fichier

Fichier : 21_pmf_and_cdf.py Projet : printfCRLF/pp

def plot_a_cdf(gss):
    # Select realinc
    income = gss["realinc"].values

    # Make the CDF
    cdf_income = Cdf.from_seq(income)

    # Plot it
    cdf_income.plot()

    # Label the axes
    plt.xlabel('Income (1986 USD)')
    plt.ylabel('CDF')
    plt.show()

Exemple #12

0

Afficher le fichier

Fichier : 21_pmf_and_cdf.py Projet : printfCRLF/pp

def compute_iqr(gss):
    income = gss["realinc"].values
    cdf_income = Cdf.from_seq(income)

    # Calculate the 75th percentile
    percentile_75th = cdf_income.inverse(0.75)

    # Calculate the 25th percentile
    percentile_25th = cdf_income.inverse(0.25)

    # Calculate the interquartile range
    iqr = percentile_75th - percentile_25th

    # Print the interquartile range
    print(iqr)

Exemple #13

0

Afficher le fichier

Fichier : ch09_sugarscape.py Projet : smithb16/ThinkComplexity2

def wealth_distribution(env, plot=True):
    """Make CDF of sugar distribution.

    env: Sugarscape
    """
    qs = [0.25, 0.5, 0.75, 0.9]
    cdf = Cdf.from_seq(agent.sugar for agent in env.agents)
    for q in qs:
        print('Wealth of {:.0%}'.format(q), end='')
        print(': %i' %cdf.quantile(q))

    if plot:
        cdf.plot()
        decorate(xlabel='Wealth', ylabel='CDF')
        plt.show(block=True)

    return cdf

Exemple #14

0

Afficher le fichier

def plot_income_cdfs(gss, high, assc, bach):
    income = gss['realinc']

    # Plot the CDFs
    Cdf.from_seq(income[high]).plot(label='High school')
    Cdf.from_seq(income[assc]).plot(label='Associate')
    Cdf.from_seq(income[bach]).plot(label='Bachelor')

    # Label the axes
    plt.xlabel('Income (1986 USD)')
    plt.ylabel('CDF')
    plt.legend()
    plt.show()

Exemple #15

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : TynClause/empiricaldist

    def testCdfComparison(self):
        d4 = Cdf.from_seq(range(1,5))
        self.assertEqual(d4.gt_dist(2), 0.5)
        self.assertEqual(d4.gt_dist(d4), 0.375)

        self.assertEqual(d4.lt_dist(2), 0.25)
        self.assertEqual(d4.lt_dist(d4), 0.375)

        self.assertEqual(d4.ge_dist(2), 0.75)
        self.assertEqual(d4.ge_dist(d4), 0.625)

        self.assertEqual(d4.le_dist(2), 0.5)
        self.assertEqual(d4.le_dist(d4), 0.625)

        self.assertEqual(d4.eq_dist(2), 0.25)
        self.assertEqual(d4.eq_dist(d4), 0.25)

        self.assertEqual(d4.ne_dist(2), 0.75)
        self.assertEqual(d4.ne_dist(d4), 0.75)

Exemple #16

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : TynClause/empiricaldist

    def testConversionFunctions(self):
        t = [1, 2, 2, 3, 5, 5, 7, 10]
        pmf = Pmf.from_seq(t)
        cdf = Cdf.from_seq(t)
        surv = Surv.from_seq(t)
        haz = Hazard.from_seq(t)

        cdf2 = pmf.make_cdf()
        self.almost_equal_dist(cdf, cdf2)

        surv2 = pmf.make_surv()
        self.almost_equal_dist(surv, surv2)

        haz2 = pmf.make_hazard()
        self.almost_equal_dist(haz, haz2)

        surv3 = haz2.make_surv()
        self.almost_equal_dist(surv, surv3)

        cdf3 = haz2.make_cdf()
        self.almost_equal_dist(cdf, cdf3)

        pmf3 = haz2.make_pmf()
        self.almost_equal_dist(pmf, pmf3)

Exemple #17

0

Afficher le fichier

Fichier : lqh_st.py Projet : mcuevasg/Prueba_Canal

    if select == 'Prime':
        fig = px.line(prime, x='Fecha', y=hogar_canales)
    elif select == 'Prime Segunda Franja':
        fig = px.line(prime2, x='Fecha', y=hogar_canales)
    elif select == 'Off Prime PM':
        fig = px.line(offprime, x='Fecha', y=hogar_canales)
    else:
        fig = px.line(off2, x='Fecha', y=hogar_canales)

    st.plotly_chart(fig)

# fig_bar=px.bar(salida_Franja,x='Franja',y=('SH_C13','mean'))
# st.plotly_chart(fig_bar)
##########################   FUNCIONES DE DISTRIBUCION   ######################################################################
cdf_p = Cdf.from_seq(prime['SH_C13'])
cdf_o = Cdf.from_seq(offprime['SH_C13'])

x = np.array(cdf_p.index)
y = cdf_p.values

min_x = int(np.around(x.min()))
max_x = int(np.around(x.max()))

share_min = st.sidebar.slider("Share Hogar Minimo ", min_x, max_x)
share_max = st.sidebar.slider("Share Hogar Maximo ", min_x, max_x)

#probabilidad_1=round((cdf(share_cdf_2))*100,1)
#probabilidad_2=round((1-cdf(share_cdf_2))*100,1)

st.markdown("## Probabilidad Share Hogar*")

Exemple #18

0

Afficher le fichier

Fichier : test_empiricaldist.py Projet : TynClause/empiricaldist

 def testPmfFromCdf(self):
     t = [1, 2, 2, 3, 5]
     pmf = Pmf.from_seq(t)
     cdf = Cdf.from_seq(t)
     pmf2 = cdf.make_pmf()
     self.almost_equal_dist(pmf, pmf2)

Exemple #19

0

Afficher le fichier

Fichier : ch4.py Projet : jaredparmer/ThinkComplexity

##pmf_ba.plot(label='BA model', color='C2', **options)
##plt.xlabel('Degree')
##plt.xscale('log')
##plt.yscale('log')
##plt.legend()
##
##plt.savefig('figs/chap04-3')
##plt.close()

# using Downey's code to make a BA graph and seeing how it works
##print("Constructing BA(20, 3) graph")
##ba_bespoke = barabasi_albert_graph(20, 3)
##nx.draw_circular(ba_bespoke, node_size=700, with_labels=True)
##plt.show()
""" now use cumulative distribution function objects to represent the data """
cdf_fb = Cdf.from_seq(degrees(fb), name='facebook')
cdf_ws = Cdf.from_seq(degrees(ws), name='WS model')
cdf_ba = Cdf.from_seq(degrees(ba), name='BA model')

# now plot the models on log-x scale to compare with the fb data
##plt.figure(figsize=(8,4.5))
##plt.subplot(1,2,1)
##cdf_fb.plot(color='C0')
##cdf_ws.plot(color='C1')
##plt.xlabel('Degree')
##plt.xscale('log')
##plt.ylabel('CDF')
##plt.legend()
##
##plt.subplot(1,2,2)
##cdf_fb.plot(color='C0')

Exemple #20

0

Afficher le fichier

    """
    ## Read data from Facebook file
    dirname = '/Users/bensmith/Documents/ThinkSeries/ThinkComplexity2/data/'
    fin = dirname + 'facebook_combined.txt.gz'
    fb = read_graph(fin)

    n, m, k, pmf_fb = analyze_graph(fb, verbose=True)
    print('pmf_fb:\n',type(pmf_fb))

    ## Build ws & ba models that closely represent Facebook data
    ws = nx.watts_strogatz_graph(n, k, 0.05, seed=15)
    ba = nx.barabasi_albert_graph(n, k, seed=15)
    hk = generate_hk_graph(n, k, 1, seed=15)

    ## Generate CDFs of three graphs
    cdf_fb = Cdf.from_seq(degrees(fb), name='Facebook')
    cdf_ws = Cdf.from_seq(degrees(ws), name='Watts-Strogatz')
    cdf_ba = Cdf.from_seq(degrees(ba), name='Barabasi-Albert')
    cdf_hk = Cdf.from_seq(degrees(hk), name='Holme-Kim')

    ## Generate HK graph that mimics Facebook data
    ps = np.logspace(-4, 0, 9)

    for p in ps:
        G = generate_hk_graph(n, k, p)
        print('\np: ',p)
        n, m, k, pmf_hk = analyze_graph(G, verbose=True)

    ## Generate figures comparing degree of facebook to degree of WS & BA models
    plt.figure(figsize=(8,4))

Exemple #21

0

Afficher le fichier

Fichier : Models_Flow.py Projet : Sathishvp7/CodeBook_Regression

df.dtypes # to get Data type of each column
info_df = df.info() # Information like Datatype number of Null values
describe = df.describe()

# Count the Numbe of int,float,Object columns in the dataset
count_dtypes = df.dtypes.value_counts()

# Note 1  - For a models if input is in Numeric it will learn better

# Now we going to find PMF value
from empiricaldist import Pmf,Cdf
#pmf - probablity Distibution function - Probablity of particular Variable value.
# cdf - Cummulative Disribution Function -  Sum of all possible probablity 
sp = df.SalePrice
Pmf_SalePrice = pd.DataFrame(data= {'Probablity_Mass_Function': Pmf.from_seq(sp),
                                    'Cummulative_Mass_Function' : Cdf.from_seq(sp)},
                                    index= sp).sort_values(['SalePrice'])

#Visulazisation of cdf
#Note 2
'''CDF helps to understand how may precent of the total data 
is below or above a specified threshold'''
cdf = Cdf.from_seq(sp)
cdf.plot()

# 4. DATA WRANGLING 
'''
Inspecting missing values in each variables and trying to impute statistically acceptable values.
Detect outliers and remove those records.
Remove irrelevant records. Ex. Records with negative age etc
'''

Exemple #22

0

Afficher le fichier

def get_cdfs(y_true: pd.Series, y_pred: pd.Series):
    y_true = flatten_values(y_true)
    y_pred = flatten_values(y_pred)
    y_true_cdf = CDF.from_seq(y_true)
    y_pred_cdf = CDF.from_seq(y_pred)
    return y_true, y_pred