Beispiel #1
0
    xs = range(4, x + 1)
    probs = list()
    for x in xs:
        prob = stats.nbinom.pmf(x - r, r, p)
        probs.append(round(prob, 8))
        print('{0}The probability that E occurs exactly {1} times by the {2}th event is {3:.8}'\
            .format(space, r, x, prob))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    probs = np.array(probs).round(4)
    plots.barplot(xs, probs
        ,title = 'Negative Binomial Distribution; p = {0:.8}, r = {1}'\
            .format(pplot, rplot)
        ,align = 'edge'
        ,edgecolor = edgecolor
        ,show = False, close = False)

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    x = 5
    r = 3
    p = 0.2
    print('')
    print('{0}An event E occurs with a probability of {1}.'.format(space, p))
    prob = 0
    xs = range(r, x + 1)
    for x in xs:
        prob = prob + stats.nbinom.pmf(x - r, r, p)
Beispiel #2
0
        # Calculate rating means of random movies
        (means, stddevs, processed_titles) = means_of_random_movies(args.n, twitter_search_type=args.t,
                                                                            top_movies_only=args.top_movies_only,
                                                                            tweet_threshold=args.threshold)
        
        # Unpack scores
        tw_means, tw_stddevs = means, stddevs
    
        # Get netflix ratings
        netflix_scores = map(nf_title_score, processed_titles)
        nf_means, nf_stddevs = zip(*netflix_scores)
    
        correlation_coefficient = correlation(tw_means, nf_means)
    
        if DEBUG:
            print "Twitter stddev: %.2f" % stddev(tw_means)
            print "Netflix stddev: %.2f" % stddev(nf_means)
            print "Correlation: %.2f" % correlation_coefficient
    
        if args.normalize:
            mean_diff = average(nf_means) - average(tw_means)
            print "Adjusting twitter means by %.2f (nf avg mean: %.2f; tw avg mean: %.2f)" % (mean_diff, average(nf_means), average(tw_means))
            tw_means = map(lambda mean: mean + mean_diff, tw_means)
    
        if args.plot:
            data    = (tw_means, nf_means)
            errors = (tw_stddevs, nf_stddevs) if args.show_errors else None
            xlabels = map(initials, processed_titles)
            plt = barplot(data, errors=errors, xlabels=xlabels)
            plt.show()
Beispiel #3
0
    xs = np.arange(mu - 5, mu + 5 + 1, (mu / 10))
    for xl in xs:
        pdfval = stats.norm.pdf(xl, mu, sigma)
        pdfvals.append(pdfval)
        print('{0}The value of the probability density function at x = {1} is {2:.8}.'\
            .format(space, xl, pdfval))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    pdfvals = np.array(pdfvals).round(4)
    plots.barplot(xs,
                  pdfvals,
                  title='Normal Distribution; mu = {0:.8}, sigma = {1}'.format(
                      mu, sigma),
                  align='edge',
                  edgecolor=edgecolor,
                  show=False,
                  close=False)
    print('')

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    x = 7
    h = 1e-6
    probcum = stats.norm.cdf(x, mu, sigma)
    xs = np.arange((int)(-6 * sigma), x, h)
    probs = stats.norm.pdf(xs, mu, sigma)
    probcum2 = probs * h
    probcum2 = probcum2.sum()
Beispiel #4
0
        .format(space, x, pdfval))
    pdfvals = list()
    xstart = (float)(0.02)
    xend = (float)(0.7)
    h = (float)(0.02)
    xs = np.arange(xstart, xend, h)
    pdfvals = stats.expon.pdf(xs, 0, 1 / lamb).round(8)

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs).round(4)
    pdfvals = np.array(pdfvals).round(2)
    fig, ax1 = plots.barplot(xs, pdfvals
        ,title = 'Exponential Distribution; lambda = {0:.8}'.format(lamb)
        ,align = 'edge'
        ,edgecolor = edgecolor
        ,width = h
        ,show = False, close = False)
    ax2 = ax1.twinx()
    fig, ax2 = plots.scatter(xs, pdfvals, fig = fig, ax = ax1
        ,ylim = ax1.get_ylim(), markersize = 0, linewidth = 2)
    ax2.set_title('')

    # ----------------------------------------
    # sample calculations 1
    # ----------------------------------------
    prob1 = stats.expon.cdf(x, 0, 1 / lamb)
    prob1calc = 1 - math.exp(-x * lamb)
    prob2 = 1 - prob1
    assert(0.082 - round(prob2, 3) == 0)
    assert(round(prob1 - prob1calc, 8) == 0)
Beispiel #5
0
    xs = range(0, n + 1)
    for x in xs:
        prob = stats.binom.pmf(x, n, p)
        probs.append(round(prob, 8))
        print('{0}The probability that E occurs {1} times'.format(space, x) +\
            ' in the next {0} events is {1:.8}.'.format(n, prob))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    probs = np.array(probs).round(4)
    plots.barplot(xs,
                  probs,
                  title='Binomial Distribution; p = {0:.8}, n = {1}'.format(
                      p, n),
                  align='edge',
                  edgecolor=edgecolor,
                  show=False,
                  close=False)
    print('')

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    x = 4
    prob = 0
    probcum = stats.binom.cdf(x - 1, n, p)
    probcum = 1 - probcum
    for x in range(0, x):
        prob = prob + stats.binom.pmf(x, n, p)
    prob = 1 - prob
Beispiel #6
0
    xs = range(0, x + 5)
    for xl in xs:
        prob = stats.poisson.pmf(xl, lamb * T)
        probs.append(prob)
        print('{0}The probability that E occurs {1} times'.format(space, xl) +\
            ' in T = {0} units is {1:.8}'.format(T, prob))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    probs = np.array(probs).round(4)
    plots.barplot(xs,
                  probs,
                  title='Poisson Distribution; lamb = {0}, T = {1}'.format(
                      lamb, T),
                  align='edge',
                  edgecolor=edgecolor,
                  show=False,
                  close=False)
    print('')

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    x = 1
    T = 2
    probcum = stats.poisson.cdf(x, lamb * T)
    probcum = 1 - probcum + stats.poisson.pmf(x, lamb * T)
    prob = 0
    for xl in range(0, x):
        prob = prob + stats.poisson.pmf(xl, lamb * T)
Beispiel #7
0
    xs = range(x + 1)
    for x in xs:
        prob = stats.geom.pmf(x, p)
        probs.append(round(prob, 8))
        print('{0}The probability that E first occurs in {1} times is {2:.8}'\
            .format(space, x, prob))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    probs = np.array(probs).round(4)
    plots.barplot(xs,
                  probs,
                  title='Geometric Distribution; p = {0:.8}'.format(p),
                  align='edge',
                  edgecolor=edgecolor,
                  show=False,
                  close=False)
    print('')

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    x = 4
    prob = 0
    for x in range(0, x):
        prob = prob + stats.geom.pmf(x, p)
    prob = 1 - prob
    assert (0.729 == round(prob, 3))
    print('{0}The probability that E first occurs in >= {1} events is {2:.8}'.
Beispiel #8
0
    probs = list()
    xs = range(0, n + 1)
    for x in xs:
        prob = stats.hypergeom.pmf(x, N, K, n)
        probs.append(round(prob, 8))
        print('{0}The probability that the sample contains x = {1} objects of interest is {2:.8}.'\
            .format(space, x, prob))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    xs = np.array(xs)
    probs = np.array(probs).round(4)
    plots.barplot(xs, probs
        ,title = 'Hypergeometric Distribution; N = {0}, K = {1}, n = {2}'.format(N, K, n)
        ,align = 'edge'
        ,edgecolor = edgecolor
        ,show = False, close = False)
    print('')

    # ----------------------------------------
    # cumulative probabilities
    # ----------------------------------------
    N = 300
    K = 100
    n = 4
    x = 2
    probcum = stats.hypergeom.cdf(x, N, K, n)
    probcum = 1 - probcum + stats.hypergeom.pmf(x, N, K, n)
    prob = 0
    xs = range(0, x)
Beispiel #9
0
            twitter_search_type=args.t,
            top_movies_only=args.top_movies_only,
            tweet_threshold=args.threshold)

        # Unpack scores
        tw_means, tw_stddevs = means, stddevs

        # Get netflix ratings
        netflix_scores = map(nf_title_score, processed_titles)
        nf_means, nf_stddevs = zip(*netflix_scores)

        correlation_coefficient = correlation(tw_means, nf_means)

        if DEBUG:
            print "Twitter stddev: %.2f" % stddev(tw_means)
            print "Netflix stddev: %.2f" % stddev(nf_means)
            print "Correlation: %.2f" % correlation_coefficient

        if args.normalize:
            mean_diff = average(nf_means) - average(tw_means)
            print "Adjusting twitter means by %.2f (nf avg mean: %.2f; tw avg mean: %.2f)" % (
                mean_diff, average(nf_means), average(tw_means))
            tw_means = map(lambda mean: mean + mean_diff, tw_means)

        if args.plot:
            data = (tw_means, nf_means)
            errors = (tw_stddevs, nf_stddevs) if args.show_errors else None
            xlabels = map(initials, processed_titles)
            plt = barplot(data, errors=errors, xlabels=xlabels)
            plt.show()
Beispiel #10
0
    n = 10      # in this many trials
    x1 = 0
    x2 = 10
    probs = list()
    xs1 = np.array(range(x1, x2 + 1))
    probs = stats.binom.pmf(xs1, n, p)
    xs2 = np.arange(x1, x2, 1e-1)
    proba = stats.norm.pdf((xs2 - (n * p))/(math.sqrt((n * p) * (1 - p))))

    # ----------------------------------------
    # plotting
    # ----------------------------------------
    probs = np.array(probs).round(4)
    fig, ax1 = plots.barplot(xs1, probs
        ,title = 'Normal Approximation of Binomial Distribution; p = {0:.8}, n = {1}'.format(p, n)
        ,align = 'edge'
        ,edgecolor = edgecolor
        ,show = False, close = False)
    ax2 = ax1.twinx()
    fig, ax2 = plots.scatter(xs2 + 0.5, proba, fig = fig, ax = ax2, markersize = 0, linewidth = 2)
    ax2.set_title('')
    print('')

    # ----------------------------------------
    # sample calculations
    # ----------------------------------------
    p = 1e-5        # probability of E occurring
    n = 16e6        # in this many trials
    x = 150         # contains this many occurrences of E
    h = 1           # step size
    xs1 = np.arange(0, x + h, 1)