def __init__(self, dist_dir): Pmf.__init__(self) self.dist_dir = dist_dir self.pmf = None self.infs_data = {} self.json_data = {} self.dist_file = ''
def __init__(self, hypos): """Initialize self. hypos: sequence of string bowl IDs """ Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize()
def __init__(self, hypos): """Initialize the distribution. hypos: sequence of hypotheses """ Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize()
def __init__(self, hypos, alpha=1.0): Pmf.__init__(self) ''' 父类对prior的概率分布采用的是平均分布(uniform distribution) 此处改写prior的概率分布为幂律(power law distribution) ''' for hypo in hypos: self.Set(hypo, hypo**(-alpha)) self.Normalize()
def __init__(self, bowl1, bowl2): """Constructor""" self.bowl1 = bowl1 self.bowl2 = bowl2 Pmf.__init__(self) self.Set(bowl1.bowl_name, 1) self.Set(bowl2.bowl_name, 1) self.Normalize() self.set_mixes()
def __init__(self, hypos): Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize() self.mixes = { 'Bowl 1':dict(vanilla=0.75, chocolate=0.25), 'Bowl 2':dict(vanilla=0.5, chocolate=0.5) }
def __init__(self, hypos, bowls): """Initialize self. hypos: sequence of string bowl IDs """ Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize() self.bowls = dict() for bowl in bowls.keys(): self.bowls[bowl] = Bowl(bowls[bowl])
def __init__(self, hypos): """Initialize self. hypos: sequence of string bowl IDs """ Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize() bowl1 = Bowl(vanilla=30, chocolate=10) bowl2 = Bowl(vanilla=20, chocolate=20) self.bowls = {'Bowl 1': bowl1, 'Bowl 2': bowl2}
def MakePmf(self, steps=101, label=None): """Returns a Pmf of this distribution. Note: Normally, we just evaluate the PDF at a sequence of points and treat the probability density as a probability mass. But if alpha or beta is less than one, we have to be more careful because the PDF goes to infinity at x=0 and x=1. In that case we evaluate the CDF and compute differences. The result is a little funny, because the values at 0 and 1 are not symmetric. Nevertheless, it is a reasonable discrete model of the continuous distribution, and behaves well as the number of values increases. """ if label is None and self.label is not None: label = self.label if self.alpha < 1 or self.beta < 1: cdf = self.MakeCdf() pmf = cdf.MakePmf() return pmf xs = [i / (steps - 1) for i in range(steps)] probs = [self.EvalPdf(x) for x in xs] pmf = Pmf(dict(zip(xs, probs)), label=label) return pmf
def main(): pmf = Pmf() pmf.Set('Bowl 1', 0.5) pmf.Set('Bowl 2', 0.5) pmf.Mult('Bowl 1', 0.75) pmf.Mult('Bowl 2', 0.5) pmf.Normalize() print(pmf.Prob('Bowl 1')) print(pmf.Prob('Bowl 2'))
# -*- coding: utf-8 -*- """ Created on Tue Apr 17 12:08:20 2018 @author: QJ """ ###Think Bayes from thinkbayes2 import Pmf pmf = Pmf() for x in [1, 2, 3, 4, 5, 6]: pmf.Set(x, 1 / 6.0) pmf.Set('Bowl1', 0.5) pmf.Set('Bowl2', 0.5) pmf.Mult('Bowl1', 0.75) pmf.Mult('Bowl2', 0.5) print pmf.Prob('Bowl 1') print(pmf)
# http://www.greenteapress.com/thinkbayes/thinkbayes.pdf # http://thinkbayes.com/thinkbayes.py # python -m pip install scipy numpy matplotlib pandas from thinkbayes2 import Pmf pmf = Pmf() pmf.Set('tazon1', 0.5) pmf.Set('tazon2', 0.5) pmf.Mult('tazon1', 0.75) pmf.Mult('tazon2', 0.5) pmf.Normalize() print(pmf.Prob('tazon1'))
def __init__(self, hypos, priors): Pmf.__init__(self) '''Set the priors (for each H, the corrisponding p(H)''' for i in range(len(hypos)): self.Set(hypos[i],priors[i]) self.Normalize()
def __init__(self, hypos, alpha=None): Pmf.__init__(self, hypos) if alpha is not None: for hypo in hypos: self.Set(hypo, hypo**(-alpha)) self.Normalize()
def __init__(self, sides): Pmf.__init__(self) for x in range(1, sides + 1): self.Set(x, 1) self.Normalize()
#!/usr/env python import sys sys.path.append("../lib/ThinkBayes2/code/") from thinkbayes2 import Pmf pmf = Pmf() #Prior Dist pmf.Set('Bowl1', 0.5) pmf.Set('Bowl2', 0.5) print(pmf) #Update based on new data pmf.Mult('Bowl1', 0.75) pmf.Mult('Bowl2', 0.5) print(pmf) #Hypotheses are mutally exclusive and collectively exhaustive, we can renormalize! pmf.Normalize() print(pmf) #posterior distribution #book covers a more complicated implementation, but it is the same as Monty print("Finished")
from __future__ import print_function, division import sys sys.path.append("../lib/ThinkBayes2/code/") from thinkbayes2 import Suite, Pmf, SampleSum, MakeMixture import thinkplot from simulationDD02 import Die pmf_dice = Pmf() pmf_dice.Set(Die(4), 2) pmf_dice.Set(Die(6), 3) pmf_dice.Set(Die(8), 2) pmf_dice.Set(Die(12), 1) pmf_dice.Set(Die(20), 1) pmf_dice.Normalize() print(pmf_dice) print("#################################################") mix = Pmf() for die, weight in pmf_dice.Items(): for outcome, prob in die.Items(): mix.Incr(outcome, weight * prob) #Shorthand for above #mix = MakeMixture(pmf_dice) print(mix) thinkplot.Hist(mix) thinkplot.Save(root='bar', xlabel='Mixture over a set of dice', ylabel='Probability', formats=['pdf'])
from thinkbayes2 import Pmf if __name__ == '__main__': txt = [] with open('/Users/glenn/math/stats/ThinkBayes2/data/ch02_02_text_data.txt' ) as f: txt = f.read().split() print "txt = ", txt pmf = Pmf() for word in txt: pmf.Incr(word, 1) pmf.Normalize() pmf.Print()
from thinkbayes2 import Pmf import thinkplot d6 = Pmf() for x in [1, 2, 3, 4, 5, 6]: d6[x] = 1 d6.Normalize() print(d6) twice = d6 + d6 # If the sum of two dice is greater than 3, then we update the dictionary twice[2] = 0 twice[3] = 0 twice.Normalize() print(twice) thinkplot.Hist(twice) thinkplot.show()
from thinkbayes2 import Pmf if __name__ == '__main__': pmf = Pmf() for x in range(1, 7): pmf.Set(x, 1 / 6.0) pmf.Print()
#!/usr/env python import sys sys.path.append("../lib/ThinkBayes2/code/") from thinkbayes2 import Pmf pmf = Pmf() for x in [1, 2, 3, 4, 5, 6]: pmf.Set(x, 1 / 6.0) print(pmf) words = Pmf() f = open('alice.txt', 'r') for line in f: for word in line.split(): words.Incr(word.strip(), 1) #print(words) words.Normalize() print(words) print("Finished")
from thinkbayes2 import Pmf, Suite import thinkplot # Probability of each dice given that when rolled the output is 6 dice = Pmf(['4-sided', '6-sided', '8-sided', '12-sided']) dice['4-sided'] *= 0 dice['6-sided'] *= 1 / 6 dice['8-sided'] *= 1 / 8 dice['12-sided'] *= 1 / 12 dice.Normalize() print(dice) suite = Suite([4, 6, 8, 12]) suite[4] *= 0 suite[6] *= 1 / 6 suite[8] *= 1 / 8 suite[12] *= 1 / 12 suite.Normalize() print(suite) class Dice(Suite): # hypo is the number if sides in the die # dat is the outcome def Likelihood(self, data, hypo): return 0 if data > hypo else 1 / hypo
class InfulsContribution(Pmf): def __init__(self, dist_dir): Pmf.__init__(self) self.dist_dir = dist_dir self.pmf = None self.infs_data = {} self.json_data = {} self.dist_file = '' def prior(self, json_data): self.pmf = Pmf() self.posterior(json_data) return self def posterior(self, json_data): self.set_json_data(json_data) self.update_infs_data() self.update() self.set_dist_file(json_data, self.dist_dir) self.output() return self def output(self): pmf = self.pmf pmf.Normalize() dist_data = self.make_result(pmf) self.save_file(dist_data) return self # main nethods # set jsondata as dict to self.jsondata def set_json_data(self, json_data): with open(json_data, 'rb') as f: data = json.load(f) self.json_data = data # update self.infs_data def update_infs_data(self): # print('sel', self.json_data) datas = self.json_data['analythics_data'] click_sum = self.json_data['meta_data']['click_sum'] # infs_data = 0 # for d in datas: # infs_data += int(datas[d]['prof_clicks']) for d in datas: if not self.infs_data.get(d): self.infs_data[d] = {'click_sum': 0, 'prof_clicks': 0} self.infs_data[d]['click_sum'] += click_sum self.infs_data[d]['prof_clicks'] += datas[d]['prof_clicks'] # set self dist_file def set_dist_file(self, src_file, dist_dir): dist_file = '' a = re.search(r'\d{4}-\d{2}-\d{2}', src_file) b = re.search(r'\d{8}', src_file) if a: dist_file = a.group() if b: dist_file = b.group() dist_file = os.path.join(dist_dir, f'result{dist_file}.json') self.dist_file = dist_file # LiKelihood methods def liKelihood(self, infl): return (self.infs_data[infl]['prof_clicks'] / self.infs_data[infl]['click_sum']) # update datas def update(self): data = self.json_data['analythics_data'] click_sum = self.json_data['meta_data']['click_sum'] print('d', data) # data = {'@yoshiki_ruby': data['@yoshiki_ruby']} [self.pmf.Incr(infl, self.liKelihood(infl)) for infl in data] # create result json def make_result(self, pmf): d = pmf.Values() res_d = {k: pmf.Prob(k) for k in d} res_d = sorted(res_d.items(), key=lambda x: x[1], reverse=True) # print(res_d) return { v[0]: { "rating": v[1], "prof_clicks": self.infs_data[v[0]]['prof_clicks'], "click_sum": self.infs_data[v[0]]['click_sum'], } for v in res_d } # output json def save_file(self, json_file): with open(self.dist_file, 'w') as f: json.dump(json_file, f, indent=2)
"""This file contains code for use with "Think Bayes", by Allen B. Downey, available from greenteapress.com Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ from __future__ import print_function, division from thinkbayes2 import Pmf pmf = Pmf() pmf.Set('Bowl 1', 0.5) pmf.Set('Bowl 2', 0.5) pmf.Mult('Bowl 1', 0.75) pmf.Mult('Bowl 2', 0.5) pmf.Normalize() print(pmf.prob('Bowl 1'))
import sys sys.path.insert(0, '/Users/carol/python/ThinkBayes2/thinkbayes2/') import numpy as np import matplotlib.pyplot as plt from thinkbayes2 import Pmf, Suite, CredibleInterval, Beta # PMF for 6-sided die pmf = Pmf() for x in [1, 2, 3, 4, 5, 6]: pmf.Set(x, 1 / 6) print(pmf) # How to build up a pmf from a list of strings pmf2 = Pmf() for word in ['a', 'in', 'or', 'to', 'a', 'me', 'in']: pmf2.Incr(word, 1) pmf2.Normalize() print(pmf2) print("Probability of letter a:", pmf2.Prob( 'a')) # Typo p12 print pmf.Prob('the') should read print(pmf.Prob('the')) # PMF for the Cookie problem pmf = Pmf() # Prior: pmf.Set("Bowl 1", 0.5) pmf.Set("Bowl 2", 0.5) # Posterior: # First multiply prior by likelihood pmf.Mult("Bowl 1", 0.75)
def prior(self, json_data): self.pmf = Pmf() self.posterior(json_data) return self
def __init__(self, hypos): Pmf.__init__(self) for hypo in hypos: self.Set(hypo, 1) self.Normalize()
from thinkbayes2 import Pmf # Cookie Problem # Prior 50%-50% cookie = Pmf(['Bowl 1', 'Bowl 2']) # p (Vanilla|Bowl1) = 30/40 cookie['Bowl 1'] *= 0.75 # p (Vanilla|Bowl2) = 20/40 cookie['Bowl 2'] *= 0.5 # Normalize, return values is p(D) cookie.Normalize() # Posteriors print(cookie) # Suppose we put the first cookie back, stir, choose again from the same bowl, and # get a chocolate cookie # p (Chocolate|Bowl1) = 10/40 cookie['Bowl 1'] *= 0.25 # p (Chocolate|Bowl2) = 20/40 cookie['Bowl 2'] *= 0.5 cookie.Normalize() print(cookie)