Exemple #1
0
import numpy as np
import matplotlib.pyplot as plt

from thinkbayes2 import Pmf, Suite, CredibleInterval, Beta

# PMF for 6-sided die
pmf = Pmf()
for x in [1, 2, 3, 4, 5, 6]:
    pmf.Set(x, 1 / 6)
print(pmf)

# How to build up a pmf from a list of strings
pmf2 = Pmf()
for word in ['a', 'in', 'or', 'to', 'a', 'me', 'in']:
    pmf2.Incr(word, 1)
pmf2.Normalize()
print(pmf2)
print("Probability of letter a:", pmf2.Prob(
    'a'))  # Typo p12 print pmf.Prob('the') should read print(pmf.Prob('the'))

# PMF for the Cookie problem
pmf = Pmf()
# Prior:
pmf.Set("Bowl 1", 0.5)
pmf.Set("Bowl 2", 0.5)
# Posterior:
# First multiply prior by likelihood
pmf.Mult("Bowl 1", 0.75)
pmf.Mult("Bowl 2", 0.5)
# Then normalise (we can do this because the hypotheses are mutually exclusive and collectively exhaustive,
Exemple #2
0
from thinkbayes2 import Suite, Pmf, SampleSum, MakeMixture
import thinkplot
from simulationDD02 import Die

pmf_dice = Pmf()
pmf_dice.Set(Die(4), 2)
pmf_dice.Set(Die(6), 3)
pmf_dice.Set(Die(8), 2)
pmf_dice.Set(Die(12), 1)
pmf_dice.Set(Die(20), 1)
pmf_dice.Normalize()
print(pmf_dice)

print("#################################################")
mix = Pmf()
for die, weight in pmf_dice.Items():
    for outcome, prob in die.Items():
        mix.Incr(outcome, weight * prob)

#Shorthand for above
#mix = MakeMixture(pmf_dice)
print(mix)

thinkplot.Hist(mix)
thinkplot.Save(root='bar',
               xlabel='Mixture over a set of dice',
               ylabel='Probability',
               formats=['pdf'])

print("Program Finished")
class InfulsContribution(Pmf):
    def __init__(self, dist_dir):
        Pmf.__init__(self)
        self.dist_dir = dist_dir
        self.pmf = None
        self.infs_data = {}
        self.json_data = {}
        self.dist_file = ''

    def prior(self, json_data):
        self.pmf = Pmf()
        self.posterior(json_data)
        return self

    def posterior(self, json_data):
        self.set_json_data(json_data)
        self.update_infs_data()
        self.update()
        self.set_dist_file(json_data, self.dist_dir)
        self.output()
        return self

    def output(self):
        pmf = self.pmf
        pmf.Normalize()
        dist_data = self.make_result(pmf)
        self.save_file(dist_data)
        return self

    #  main nethods
    #  set jsondata as dict to  self.jsondata
    def set_json_data(self, json_data):
        with open(json_data, 'rb') as f:
            data = json.load(f)
        self.json_data = data

    #  update self.infs_data
    def update_infs_data(self):
        # print('sel', self.json_data)
        datas = self.json_data['analythics_data']
        click_sum = self.json_data['meta_data']['click_sum']
        # infs_data = 0
        # for d in datas:
        #     infs_data += int(datas[d]['prof_clicks'])
        for d in datas:
            if not self.infs_data.get(d):
                self.infs_data[d] = {'click_sum': 0, 'prof_clicks': 0}
            self.infs_data[d]['click_sum'] += click_sum
            self.infs_data[d]['prof_clicks'] += datas[d]['prof_clicks']

    #  set self dist_file
    def set_dist_file(self, src_file, dist_dir):
        dist_file = ''
        a = re.search(r'\d{4}-\d{2}-\d{2}', src_file)
        b = re.search(r'\d{8}', src_file)
        if a: dist_file = a.group()
        if b: dist_file = b.group()
        dist_file = os.path.join(dist_dir, f'result{dist_file}.json')
        self.dist_file = dist_file

    #  LiKelihood methods
    def liKelihood(self, infl):
        return (self.infs_data[infl]['prof_clicks'] /
                self.infs_data[infl]['click_sum'])

    #  update datas
    def update(self):
        data = self.json_data['analythics_data']
        click_sum = self.json_data['meta_data']['click_sum']
        print('d', data)
        # data = {'@yoshiki_ruby': data['@yoshiki_ruby']}
        [self.pmf.Incr(infl, self.liKelihood(infl)) for infl in data]

    #  create result json
    def make_result(self, pmf):
        d = pmf.Values()
        res_d = {k: pmf.Prob(k) for k in d}
        res_d = sorted(res_d.items(), key=lambda x: x[1], reverse=True)
        # print(res_d)
        return {
            v[0]: {
                "rating": v[1],
                "prof_clicks": self.infs_data[v[0]]['prof_clicks'],
                "click_sum": self.infs_data[v[0]]['click_sum'],
            }
            for v in res_d
        }

    #  output json
    def save_file(self, json_file):
        with open(self.dist_file, 'w') as f:
            json.dump(json_file, f, indent=2)
Exemple #4
0
from thinkbayes2 import Pmf

if __name__ == '__main__':
    txt = []
    with open('/Users/glenn/math/stats/ThinkBayes2/data/ch02_02_text_data.txt'
              ) as f:
        txt = f.read().split()

    print "txt = ", txt

    pmf = Pmf()
    for word in txt:
        pmf.Incr(word, 1)

    pmf.Normalize()

    pmf.Print()
#!/usr/env python
import sys
sys.path.append("../lib/ThinkBayes2/code/")
from thinkbayes2 import Pmf

pmf = Pmf()
for x in [1, 2, 3, 4, 5, 6]:
    pmf.Set(x, 1 / 6.0)
print(pmf)

words = Pmf()
f = open('alice.txt', 'r')
for line in f:
    for word in line.split():
        words.Incr(word.strip(), 1)
#print(words)
words.Normalize()
print(words)

print("Finished")