Ejemplo n.º 1
0
full_lex = lexicon_convert('../../data/merged_lex_hash.pm')

# Remove SentiSpin
lex = {}
for item, vals in full_lex.items():
    if SP in vals:
        if len(vals) == 1:
            continue
        else:
            del vals[SP]
    lex[item] = vals

rms = stats(lex, (GE, PC, WS))
reweight(lex, rms)
new = stats_dash(lex, (GE, WS))
reweight_dash(lex, new)

inf = posterior(lex, (GE, PC, WS), tol=10**-9,
                bounds = ((0.001,10),(0.001,5),(0.001,5),(0.001,5)),
                initial = (0.6, 0.3, 0.3, 0.3))

deviation = {'prior':inf['x'][0],
                  GE:inf['x'][1],
                  PC:inf['x'][2],
                  WS:inf['x'][3]}

new_lex, weight = combine(lex, deviation)

with open('../../data/sentimerge_nospin.pk', 'wb') as f:
    pickle.dump((new_lex, weight), f)
Ejemplo n.º 2
0
from lexicon_convert import lexicon_convert
from stats import stats, stats_dash
from reweight import reweight, reweight_dash
from bayes import posterior, combine, save_readable

import pickle

GE = 'german'
PC = 'GermanPC'
WS = 'GermanSentiWS'
SP = 'GermanSentiSpin'

lex = lexicon_convert('../../data/merged_lex_hash.pm', verbose=False)
rms = stats(lex, (GE, PC, WS, SP), verbose=False)
reweight(lex, rms)
new = stats_dash(lex, (GE, WS, SP), verbose=False)
reweight_dash(lex, new)

with open('../../data/premerge.pk', 'wb') as f:
    pickle.dump(lex, f)

inf = posterior(lex, (GE, PC, WS, SP), tol=10**-9, initial=[ 0.52847201,  0.32771543,  0.31697963,  0.44575393,  0.60947068])

deviation = {'prior':inf['x'][0],
                  GE:inf['x'][1],
                  PC:inf['x'][2],
                  WS:inf['x'][3],
                  SP:inf['x'][4]}

new_lex, weight = combine(lex, deviation)