xs = range(4, x + 1) probs = list() for x in xs: prob = stats.nbinom.pmf(x - r, r, p) probs.append(round(prob, 8)) print('{0}The probability that E occurs exactly {1} times by the {2}th event is {3:.8}'\ .format(space, r, x, prob)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) probs = np.array(probs).round(4) plots.barplot(xs, probs ,title = 'Negative Binomial Distribution; p = {0:.8}, r = {1}'\ .format(pplot, rplot) ,align = 'edge' ,edgecolor = edgecolor ,show = False, close = False) # ---------------------------------------- # cumulative probabilities # ---------------------------------------- x = 5 r = 3 p = 0.2 print('') print('{0}An event E occurs with a probability of {1}.'.format(space, p)) prob = 0 xs = range(r, x + 1) for x in xs: prob = prob + stats.nbinom.pmf(x - r, r, p)
# Calculate rating means of random movies (means, stddevs, processed_titles) = means_of_random_movies(args.n, twitter_search_type=args.t, top_movies_only=args.top_movies_only, tweet_threshold=args.threshold) # Unpack scores tw_means, tw_stddevs = means, stddevs # Get netflix ratings netflix_scores = map(nf_title_score, processed_titles) nf_means, nf_stddevs = zip(*netflix_scores) correlation_coefficient = correlation(tw_means, nf_means) if DEBUG: print "Twitter stddev: %.2f" % stddev(tw_means) print "Netflix stddev: %.2f" % stddev(nf_means) print "Correlation: %.2f" % correlation_coefficient if args.normalize: mean_diff = average(nf_means) - average(tw_means) print "Adjusting twitter means by %.2f (nf avg mean: %.2f; tw avg mean: %.2f)" % (mean_diff, average(nf_means), average(tw_means)) tw_means = map(lambda mean: mean + mean_diff, tw_means) if args.plot: data = (tw_means, nf_means) errors = (tw_stddevs, nf_stddevs) if args.show_errors else None xlabels = map(initials, processed_titles) plt = barplot(data, errors=errors, xlabels=xlabels) plt.show()
xs = np.arange(mu - 5, mu + 5 + 1, (mu / 10)) for xl in xs: pdfval = stats.norm.pdf(xl, mu, sigma) pdfvals.append(pdfval) print('{0}The value of the probability density function at x = {1} is {2:.8}.'\ .format(space, xl, pdfval)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) pdfvals = np.array(pdfvals).round(4) plots.barplot(xs, pdfvals, title='Normal Distribution; mu = {0:.8}, sigma = {1}'.format( mu, sigma), align='edge', edgecolor=edgecolor, show=False, close=False) print('') # ---------------------------------------- # cumulative probabilities # ---------------------------------------- x = 7 h = 1e-6 probcum = stats.norm.cdf(x, mu, sigma) xs = np.arange((int)(-6 * sigma), x, h) probs = stats.norm.pdf(xs, mu, sigma) probcum2 = probs * h probcum2 = probcum2.sum()
.format(space, x, pdfval)) pdfvals = list() xstart = (float)(0.02) xend = (float)(0.7) h = (float)(0.02) xs = np.arange(xstart, xend, h) pdfvals = stats.expon.pdf(xs, 0, 1 / lamb).round(8) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs).round(4) pdfvals = np.array(pdfvals).round(2) fig, ax1 = plots.barplot(xs, pdfvals ,title = 'Exponential Distribution; lambda = {0:.8}'.format(lamb) ,align = 'edge' ,edgecolor = edgecolor ,width = h ,show = False, close = False) ax2 = ax1.twinx() fig, ax2 = plots.scatter(xs, pdfvals, fig = fig, ax = ax1 ,ylim = ax1.get_ylim(), markersize = 0, linewidth = 2) ax2.set_title('') # ---------------------------------------- # sample calculations 1 # ---------------------------------------- prob1 = stats.expon.cdf(x, 0, 1 / lamb) prob1calc = 1 - math.exp(-x * lamb) prob2 = 1 - prob1 assert(0.082 - round(prob2, 3) == 0) assert(round(prob1 - prob1calc, 8) == 0)
xs = range(0, n + 1) for x in xs: prob = stats.binom.pmf(x, n, p) probs.append(round(prob, 8)) print('{0}The probability that E occurs {1} times'.format(space, x) +\ ' in the next {0} events is {1:.8}.'.format(n, prob)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) probs = np.array(probs).round(4) plots.barplot(xs, probs, title='Binomial Distribution; p = {0:.8}, n = {1}'.format( p, n), align='edge', edgecolor=edgecolor, show=False, close=False) print('') # ---------------------------------------- # cumulative probabilities # ---------------------------------------- x = 4 prob = 0 probcum = stats.binom.cdf(x - 1, n, p) probcum = 1 - probcum for x in range(0, x): prob = prob + stats.binom.pmf(x, n, p) prob = 1 - prob
xs = range(0, x + 5) for xl in xs: prob = stats.poisson.pmf(xl, lamb * T) probs.append(prob) print('{0}The probability that E occurs {1} times'.format(space, xl) +\ ' in T = {0} units is {1:.8}'.format(T, prob)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) probs = np.array(probs).round(4) plots.barplot(xs, probs, title='Poisson Distribution; lamb = {0}, T = {1}'.format( lamb, T), align='edge', edgecolor=edgecolor, show=False, close=False) print('') # ---------------------------------------- # cumulative probabilities # ---------------------------------------- x = 1 T = 2 probcum = stats.poisson.cdf(x, lamb * T) probcum = 1 - probcum + stats.poisson.pmf(x, lamb * T) prob = 0 for xl in range(0, x): prob = prob + stats.poisson.pmf(xl, lamb * T)
xs = range(x + 1) for x in xs: prob = stats.geom.pmf(x, p) probs.append(round(prob, 8)) print('{0}The probability that E first occurs in {1} times is {2:.8}'\ .format(space, x, prob)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) probs = np.array(probs).round(4) plots.barplot(xs, probs, title='Geometric Distribution; p = {0:.8}'.format(p), align='edge', edgecolor=edgecolor, show=False, close=False) print('') # ---------------------------------------- # cumulative probabilities # ---------------------------------------- x = 4 prob = 0 for x in range(0, x): prob = prob + stats.geom.pmf(x, p) prob = 1 - prob assert (0.729 == round(prob, 3)) print('{0}The probability that E first occurs in >= {1} events is {2:.8}'.
probs = list() xs = range(0, n + 1) for x in xs: prob = stats.hypergeom.pmf(x, N, K, n) probs.append(round(prob, 8)) print('{0}The probability that the sample contains x = {1} objects of interest is {2:.8}.'\ .format(space, x, prob)) # ---------------------------------------- # plotting # ---------------------------------------- xs = np.array(xs) probs = np.array(probs).round(4) plots.barplot(xs, probs ,title = 'Hypergeometric Distribution; N = {0}, K = {1}, n = {2}'.format(N, K, n) ,align = 'edge' ,edgecolor = edgecolor ,show = False, close = False) print('') # ---------------------------------------- # cumulative probabilities # ---------------------------------------- N = 300 K = 100 n = 4 x = 2 probcum = stats.hypergeom.cdf(x, N, K, n) probcum = 1 - probcum + stats.hypergeom.pmf(x, N, K, n) prob = 0 xs = range(0, x)
twitter_search_type=args.t, top_movies_only=args.top_movies_only, tweet_threshold=args.threshold) # Unpack scores tw_means, tw_stddevs = means, stddevs # Get netflix ratings netflix_scores = map(nf_title_score, processed_titles) nf_means, nf_stddevs = zip(*netflix_scores) correlation_coefficient = correlation(tw_means, nf_means) if DEBUG: print "Twitter stddev: %.2f" % stddev(tw_means) print "Netflix stddev: %.2f" % stddev(nf_means) print "Correlation: %.2f" % correlation_coefficient if args.normalize: mean_diff = average(nf_means) - average(tw_means) print "Adjusting twitter means by %.2f (nf avg mean: %.2f; tw avg mean: %.2f)" % ( mean_diff, average(nf_means), average(tw_means)) tw_means = map(lambda mean: mean + mean_diff, tw_means) if args.plot: data = (tw_means, nf_means) errors = (tw_stddevs, nf_stddevs) if args.show_errors else None xlabels = map(initials, processed_titles) plt = barplot(data, errors=errors, xlabels=xlabels) plt.show()
n = 10 # in this many trials x1 = 0 x2 = 10 probs = list() xs1 = np.array(range(x1, x2 + 1)) probs = stats.binom.pmf(xs1, n, p) xs2 = np.arange(x1, x2, 1e-1) proba = stats.norm.pdf((xs2 - (n * p))/(math.sqrt((n * p) * (1 - p)))) # ---------------------------------------- # plotting # ---------------------------------------- probs = np.array(probs).round(4) fig, ax1 = plots.barplot(xs1, probs ,title = 'Normal Approximation of Binomial Distribution; p = {0:.8}, n = {1}'.format(p, n) ,align = 'edge' ,edgecolor = edgecolor ,show = False, close = False) ax2 = ax1.twinx() fig, ax2 = plots.scatter(xs2 + 0.5, proba, fig = fig, ax = ax2, markersize = 0, linewidth = 2) ax2.set_title('') print('') # ---------------------------------------- # sample calculations # ---------------------------------------- p = 1e-5 # probability of E occurring n = 16e6 # in this many trials x = 150 # contains this many occurrences of E h = 1 # step size xs1 = np.arange(0, x + h, 1)