Beispiel #1
0
 def gen():
     #for lang, corpus in corpora.ud_corpora.items():
     for lang, corpus in [['en', corpora.ud_corpora['en']]]:
         sentences = list(corpus.sentences(fix_content_head=False))
         for k in range(0, max_k):
             h, _ = skip_pmi(c, sentences, k, by_deptype=True)
             for deptype, pmis in h.items():
                 yield lang, k, deptype, mean(pmis), count(pmis)
Beispiel #2
0
def hdmi_sweep():
    d = {}
    for lang, corpus in corpora.ud_corpora.items():
        sentences = list(corpus.sentences(fix_content_head=False))
        d[lang] = {
            'hd_mi': hdmi(cond.get_pos, sentences),
            'hd_n': count(hd(cond.nothing, sentences)),
            'gd_mi': gdmi(cond.get_pos, sentences),
            'gd_n': count(gd(cond.nothing, sentences)),
            'ss_mi': ssmi(cond.get_pos, sentences),
            'ss_n': count(ss(cond.nothing, sentences)),
        }
        for i in range(0, 25):
            d[lang]['a_%d_mi' % i] = adjacents_mi(cond.get_pos, sentences, i)
            d[lang]['a_%d_n' % i] = count(adjacents(cond.nothing, sentences,
                                                    i))
    df = pd.DataFrame(d).T
    df['lang'] = df.index
    return df
Beispiel #3
0
 def entropy(self, choices):
     return log(count(choices))
Beispiel #4
0
 def entropy(self, choices):
     return log(count(choices))