コード例 #1
0
ファイル: models.py プロジェクト: ldmt-muri/morpholm
class SwitchingMorphoProcess:
    def __init__(self, word_model, stem_model, pattern_model, analyses):
        self.word_model = word_model
        self.mp = MorphoProcess(stem_model, pattern_model, analyses)
        self.switch_model = BetaBernouilli(1.0, 1e6)
        self.analyses = analyses
        self.switches = defaultdict(list)

    def increment(self, k):
        p_word, p_mp = self.probs(k)
        x = random.random() * (p_word + p_mp)
        if x < p_word:
            self.word_model.increment(k)
            switch = True
        else:
            self.mp.increment(k)
            switch = False
        self.switch_model.increment(switch)
        self.switches[k].append(switch)

    def decrement(self, k):
        switch = remove_random(self.switches[k])
        if switch:
            self.word_model.decrement(k)
        else:
            self.mp.decrement(k)
        self.switch_model.decrement(switch)

    def probs(self, k):
        p = self.switch_model.p
        p_word = p * self.word_model.prob(k)
        p_mp = 0 if len(self.analyses[k]) == 0 else (1 - p) * self.mp.prob(k)
        return (p_word, p_mp)

    def prob(self, k):
        return sum(self.probs(k))

    def log_likelihood(self, full=False):
        return (self.word_model.log_likelihood(full=full)
                + self.mp.log_likelihood(full=full)
                + self.switch_model.log_likelihood(full=full))

    def resample_hyperparemeters(self, n_iter):
        logging.info('Resampling word model hyperparameters')
        a1, r1 = self.word_model.resample_hyperparemeters(n_iter)
        logging.info('Resampling mp hyperparameters')
        a2, r2 = self.mp.resample_hyperparemeters(n_iter)
        return (a1+a2, r1+r2)

    def __repr__(self):
        return ('Switching[{self.word_model}+{self.mp}'
                '|switch={self.switch_model}]').format(self=self)
コード例 #2
0
ファイル: model.py プロジェクト: ldmt-muri/morpholm
class MorphoAlignmentModel(AlignmentModel):
    def __init__(self, n_source, stem_base, pattern_model, analyses):
        """AlignmentModel(n_source, t_base) -> morpho-alignment model
        n_source: size of the source vocabulary
        stem_base: t-table MP stem base (G_s^0)
        pattern_model: t-table MP pattern model (G_p)
        analyses: t-table MP analyses"""
        self.null = BetaBernouilli(1.0, 1.0) # p(NULL) ~ Beta(1, .)
        self.a_table = AlignmentDistribution(GammaPrior(1.0, 1.0, 4.0))
        self.stem_base = stem_base # G_s^0
        self.pattern_model = pattern_model
        def make_t_word():
            stem_model = PYP(stem_base, PYPPrior(1.0, 1.0, 1.0, 1.0, 0.1, 1.0)) # G_s
            mp = MorphoProcess(stem_model, self.pattern_model, analyses)
            return PYP(mp, PYPPrior(1.0, 1.0, 1.0, 1.0, 0.1, 1.0)) # G_w
        self.t_table = [make_t_word() for _ in xrange(n_source)]

    def log_likelihood(self):
        return (sum((t_word.log_likelihood() # G_w
                   + t_word.prior.log_likelihood() # d_w, T_w
                   + t_word.base.stem_model.log_likelihood() # G_s
                   + t_word.base.stem_model.prior.log_likelihood()) # d_s, T_s
                    for t_word in self.t_table)
                + self.pattern_model.log_likelihood(full=True) # G_p + ...
                + self.stem_base.log_likelihood(full=True) # G_s^0 + ...
                + self.null.log_likelihood()
                + self.a_table.log_likelihood() + self.a_table.scale_prior.log_likelihood())

    def resample_hyperparemeters(self, n_iter):
        ar = stuple((0, 0))
        logging.info('Resampling stem base / pattern model hyperparameters')
        ar += self.pattern_model.resample_hyperparemeters(n_iter) # G_p
        ar += self.pattern_model.base.resample_hyperparemeters(n_iter) # G_p^0
        ar += self.stem_base.resample_hyperparemeters(n_iter) # G_s^0
        logging.info('Resampling t-table word and stem hyperparameters')
        for t_word in self.t_table:
            ar += t_word.resample_hyperparemeters(n_iter) # G_w
            ar += t_word.base.stem_model.resample_hyperparemeters(n_iter) # G_s
        logging.info('Resampling alignment distribution scale parameter')
        ar += self.a_table.resample_hyperparemeters(n_iter)
        return ar

    def __repr__(self):
        return ('MorphoAlignmentModel(#source words={n_source} '
                '| t-table[f] ~ PYP(base=MP(stem ~ PYP(base={self.stem_base}); '
                'pattern ~ {self.pattern_model}))'
                '| a-table ~ {self.a_table} + p(NULL)={self.p_null} ~ {self.null})'
                ).format(self=self, n_source=len(self.t_table))