Exemple #1
def plot_cdf(T, S):
    cdfT = Cdf.from_seq(T)
    cdfS = Cdf.from_seq(S)

    fig = plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    cdfT.plot(xlim=(0, 50), xlabel="Avalanche duration", ylabel="CDF")

    plt.subplot(1, 2, 2)
    cdfS.plot(xlim=(0, 50), xlabel="Avalanche size", ylabel="CDF")
    plt.show('PMF size and duration')

    def testHazard(self):
        t = [1, 2, 2, 3, 5]
        haz = Hazard.from_seq(t)

        # () uses forward to interpolate
        self.assertAlmostEqual(haz(1), 0.2)
        self.assertAlmostEqual(haz(2), 0.5)
        self.assertAlmostEqual(haz(3), 0.5)
        self.assertAlmostEqual(haz(4), 0)
        self.assertAlmostEqual(haz(5), 1.0)
        self.assertAlmostEqual(haz(6), 0)

        xs = [0, 1, 2, 3, 4, 5, 6]
        res = haz(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        cdf = Cdf.from_seq(t)
        haz2 = cdf.make_hazard()
        res = haz2(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)

        surv = Surv.from_seq(t)
        haz3 = surv.make_hazard()
        res = haz3(xs)
        for x, y in zip(res, [0, 0.2, 0.5, 0.5, 0, 1, 0]):
            self.assertAlmostEqual(x, y)
Exemple #3
def comparing_cdfs(log_income, dist):
    # Evaluate the model CDF
    xs = np.linspace(2, 5.5)
    ys = dist.cdf(xs)

    # Plot the model CDF
    plt.plot(xs, ys, color='gray')

    # Create and plot the Cdf of log_income

    # Label the axes
    plt.xlabel('log10 of realinc')
    def testCdf(self):
        # if the quantities are not numeric, you can use [] but not ()
        cdf = Cdf.from_seq(list('allen'))
        self.assertAlmostEqual(cdf['a'], 0.2)
        self.assertAlmostEqual(cdf['e'], 0.4)
        self.assertAlmostEqual(cdf['l'], 0.8)
        self.assertAlmostEqual(cdf['n'], 1.0)

        t = [1, 2, 2, 3, 5]
        cdf = Cdf.from_seq(t)

        # () uses forward to interpolate
        self.assertEqual(cdf(0), 0)
        self.assertAlmostEqual(cdf(1), 0.2)
        self.assertAlmostEqual(cdf(2), 0.6)
        self.assertAlmostEqual(cdf(3), 0.8)
        self.assertAlmostEqual(cdf(4), 0.8)
        self.assertAlmostEqual(cdf(5), 1)
        self.assertAlmostEqual(cdf(6), 1)

        xs = range(-1, 7)
        ps = cdf(xs)
        for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
            self.assertAlmostEqual(p1, p2)

        self.assertEqual(cdf.inverse(0), 1)
        self.assertEqual(cdf.inverse(0.1), 1)
        self.assertEqual(cdf.inverse(0.2), 1)
        self.assertEqual(cdf.inverse(0.3), 2)
        self.assertEqual(cdf.inverse(0.4), 2)
        self.assertEqual(cdf.inverse(0.5), 2)
        self.assertEqual(cdf.inverse(0.6), 2)
        self.assertEqual(cdf.inverse(0.7), 3)
        self.assertEqual(cdf.inverse(0.8), 3)
        self.assertEqual(cdf.inverse(0.9), 5)
        self.assertEqual(cdf.inverse(0.99999), 5)
        self.assertEqual(cdf.inverse(1), 5)

        ps = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        qs = cdf.inverse(ps)
        self.assertTrue((qs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())

        xs = cdf.choice(7, replace=True)
        self.assertListEqual(xs.tolist(), [2, 5, 3, 2, 1, 1, 1])
def metabolism_distribution(env):
    """Make CDF of metabolism distribution.

    env: Sugarscape
    cdf = Cdf.from_seq(agent.metabolism for agent in env.agents)
    decorate(xlabel='Metabolism', ylabel='CDF')
def vision_distribution(env):
    """Make CDF of vision distance.

    env: Sugarscape
    cdf = Cdf.from_seq(agent.vision for agent in env.agents)
    decorate(xlabel='Vision', ylabel='CDF')
Exemple #7
def make_a_cdf(gss):
    # Select the age column
    age = gss['age'].values

    # Compute the CDF of age
    cdf_age = Cdf.from_seq(age)

    # Calculate the CDF of 30
    def testCdfSampling(self):
        cdf = Cdf.from_seq([1, 2, 3, 4, 5, 6])
        expected = [2, 4, 2, 1, 5, 4, 4, 4, 1, 3]

        a = cdf.choice(10)
        self.assertTrue(np.all((a == expected)))

        a = cdf.sample(10, replace=True, random_state=17)
        self.assertTrue(np.all((a == expected)))
Exemple #9
def plot_fitnesses(sim):
    """Plot the CDF of fitnesses.

    sim: Simulation object
    fits = sim.get_fitnesses()
    cdf_fitness = Cdf.from_seq(fits)
    print('Mean fitness\n', np.mean(fits))
    decorate(xlabel='Fitness', ylabel='CDF')
    def testNormalize(self):
        t = [0, 1, 2, 3, 3, 4, 4, 4, 5]

        pmf = Pmf.from_seq(t, normalize=False)
        total = pmf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(pmf[3], 0.22222222)

        cdf = Cdf.from_seq(t, normalize=False)
        total = cdf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(cdf(3), 0.55555555)
Exemple #11
def plot_a_cdf(gss):
    # Select realinc
    income = gss["realinc"].values

    # Make the CDF
    cdf_income = Cdf.from_seq(income)

    # Plot it

    # Label the axes
    plt.xlabel('Income (1986 USD)')
Exemple #12
def compute_iqr(gss):
    income = gss["realinc"].values
    cdf_income = Cdf.from_seq(income)

    # Calculate the 75th percentile
    percentile_75th = cdf_income.inverse(0.75)

    # Calculate the 25th percentile
    percentile_25th = cdf_income.inverse(0.25)

    # Calculate the interquartile range
    iqr = percentile_75th - percentile_25th

    # Print the interquartile range
def wealth_distribution(env, plot=True):
    """Make CDF of sugar distribution.

    env: Sugarscape
    qs = [0.25, 0.5, 0.75, 0.9]
    cdf = Cdf.from_seq(agent.sugar for agent in env.agents)
    for q in qs:
        print('Wealth of {:.0%}'.format(q), end='')
        print(': %i' %cdf.quantile(q))

    if plot:
        decorate(xlabel='Wealth', ylabel='CDF')

    return cdf
Exemple #14
def plot_income_cdfs(gss, high, assc, bach):
    income = gss['realinc']

    # Plot the CDFs
    Cdf.from_seq(income[high]).plot(label='High school')

    # Label the axes
    plt.xlabel('Income (1986 USD)')
    def testCdfComparison(self):
        d4 = Cdf.from_seq(range(1,5))
        self.assertEqual(d4.gt_dist(2), 0.5)
        self.assertEqual(d4.gt_dist(d4), 0.375)

        self.assertEqual(d4.lt_dist(2), 0.25)
        self.assertEqual(d4.lt_dist(d4), 0.375)

        self.assertEqual(d4.ge_dist(2), 0.75)
        self.assertEqual(d4.ge_dist(d4), 0.625)

        self.assertEqual(d4.le_dist(2), 0.5)
        self.assertEqual(d4.le_dist(d4), 0.625)

        self.assertEqual(d4.eq_dist(2), 0.25)
        self.assertEqual(d4.eq_dist(d4), 0.25)

        self.assertEqual(d4.ne_dist(2), 0.75)
        self.assertEqual(d4.ne_dist(d4), 0.75)
    def testConversionFunctions(self):
        t = [1, 2, 2, 3, 5, 5, 7, 10]
        pmf = Pmf.from_seq(t)
        cdf = Cdf.from_seq(t)
        surv = Surv.from_seq(t)
        haz = Hazard.from_seq(t)

        cdf2 = pmf.make_cdf()
        self.almost_equal_dist(cdf, cdf2)

        surv2 = pmf.make_surv()
        self.almost_equal_dist(surv, surv2)

        haz2 = pmf.make_hazard()
        self.almost_equal_dist(haz, haz2)

        surv3 = haz2.make_surv()
        self.almost_equal_dist(surv, surv3)

        cdf3 = haz2.make_cdf()
        self.almost_equal_dist(cdf, cdf3)

        pmf3 = haz2.make_pmf()
        self.almost_equal_dist(pmf, pmf3)
Exemple #17
    if select == 'Prime':
        fig = px.line(prime, x='Fecha', y=hogar_canales)
    elif select == 'Prime Segunda Franja':
        fig = px.line(prime2, x='Fecha', y=hogar_canales)
    elif select == 'Off Prime PM':
        fig = px.line(offprime, x='Fecha', y=hogar_canales)
        fig = px.line(off2, x='Fecha', y=hogar_canales)


# fig_bar=px.bar(salida_Franja,x='Franja',y=('SH_C13','mean'))
# st.plotly_chart(fig_bar)
##########################   FUNCIONES DE DISTRIBUCION   ######################################################################
cdf_p = Cdf.from_seq(prime['SH_C13'])
cdf_o = Cdf.from_seq(offprime['SH_C13'])

x = np.array(cdf_p.index)
y = cdf_p.values

min_x = int(np.around(x.min()))
max_x = int(np.around(x.max()))

share_min = st.sidebar.slider("Share Hogar Minimo ", min_x, max_x)
share_max = st.sidebar.slider("Share Hogar Maximo ", min_x, max_x)


st.markdown("## Probabilidad Share Hogar*")
 def testPmfFromCdf(self):
     t = [1, 2, 2, 3, 5]
     pmf = Pmf.from_seq(t)
     cdf = Cdf.from_seq(t)
     pmf2 = cdf.make_pmf()
     self.almost_equal_dist(pmf, pmf2)
Exemple #19
##pmf_ba.plot(label='BA model', color='C2', **options)

# using Downey's code to make a BA graph and seeing how it works
##print("Constructing BA(20, 3) graph")
##ba_bespoke = barabasi_albert_graph(20, 3)
##nx.draw_circular(ba_bespoke, node_size=700, with_labels=True)
""" now use cumulative distribution function objects to represent the data """
cdf_fb = Cdf.from_seq(degrees(fb), name='facebook')
cdf_ws = Cdf.from_seq(degrees(ws), name='WS model')
cdf_ba = Cdf.from_seq(degrees(ba), name='BA model')

# now plot the models on log-x scale to compare with the fb data
Exemple #20
    ## Read data from Facebook file
    dirname = '/Users/bensmith/Documents/ThinkSeries/ThinkComplexity2/data/'
    fin = dirname + 'facebook_combined.txt.gz'
    fb = read_graph(fin)

    n, m, k, pmf_fb = analyze_graph(fb, verbose=True)

    ## Build ws & ba models that closely represent Facebook data
    ws = nx.watts_strogatz_graph(n, k, 0.05, seed=15)
    ba = nx.barabasi_albert_graph(n, k, seed=15)
    hk = generate_hk_graph(n, k, 1, seed=15)

    ## Generate CDFs of three graphs
    cdf_fb = Cdf.from_seq(degrees(fb), name='Facebook')
    cdf_ws = Cdf.from_seq(degrees(ws), name='Watts-Strogatz')
    cdf_ba = Cdf.from_seq(degrees(ba), name='Barabasi-Albert')
    cdf_hk = Cdf.from_seq(degrees(hk), name='Holme-Kim')

    ## Generate HK graph that mimics Facebook data
    ps = np.logspace(-4, 0, 9)

    for p in ps:
        G = generate_hk_graph(n, k, p)
        print('\np: ',p)
        n, m, k, pmf_hk = analyze_graph(G, verbose=True)

    ## Generate figures comparing degree of facebook to degree of WS & BA models
df.dtypes # to get Data type of each column
info_df = df.info() # Information like Datatype number of Null values
describe = df.describe()

# Count the Numbe of int,float,Object columns in the dataset
count_dtypes = df.dtypes.value_counts()

# Note 1  - For a models if input is in Numeric it will learn better

# Now we going to find PMF value
from empiricaldist import Pmf,Cdf
#pmf - probablity Distibution function - Probablity of particular Variable value.
# cdf - Cummulative Disribution Function -  Sum of all possible probablity 
sp = df.SalePrice
Pmf_SalePrice = pd.DataFrame(data= {'Probablity_Mass_Function': Pmf.from_seq(sp),
                                    'Cummulative_Mass_Function' : Cdf.from_seq(sp)},
                                    index= sp).sort_values(['SalePrice'])

#Visulazisation of cdf
#Note 2
'''CDF helps to understand how may precent of the total data 
is below or above a specified threshold'''
cdf = Cdf.from_seq(sp)

Inspecting missing values in each variables and trying to impute statistically acceptable values.
Detect outliers and remove those records.
Remove irrelevant records. Ex. Records with negative age etc
Exemple #22
def get_cdfs(y_true: pd.Series, y_pred: pd.Series):
    y_true = flatten_values(y_true)
    y_pred = flatten_values(y_pred)
    y_true_cdf = CDF.from_seq(y_true)
    y_pred_cdf = CDF.from_seq(y_pred)
    return y_true, y_pred