Ejemplo n.º 1
0
	def compute_mi(self):
		morphs = self.morphs
		for morph, morph_probs in morphs.iteritems():
			mi = 0
			for label, p_g in morph_probs.p_given_att.iteritems():
				if p_g > 0:
					mi += p_g * (math.log(p_g) - math.log(self.class_probs[label]) - math.log(morph_probs.p))
			morph_probs.mi = mi


def analyze(users, model, target='gender'):
	for user in users:
		segmented = segmenter.morph_segments(model, user.id)
		user.segmented = segmented
		print user

	analyzer = Analyzer(users, target)

	morph_prob_list = analyzer.morphs.values()
	morph_prob_list.sort(key=lambda x: x.mi, reverse=True)

	for morph_prob in morph_prob_list:
		print morph_prob

if __name__ == '__main__':
	target = 'gender'
	whereclause = 'where gender is not ""' if target == 'gender' else ''

	users = segmenter.get_users_from_db(whereclause=whereclause)
	model = segmenter.load_model('../models/idmorphs_pre10naworl.model')
	analyze(users, model, target=target)
Ejemplo n.º 2
0
				semisup_classifier.segfun = None
				cPickle.dump(semisup_classifier, f)
			semisup_classifier.segfun = segfun
		return semisup_classifier


if __name__ == '__main__':
	target = 'gender'
	whereclause = "where gender is not ''" if target == 'gender' else ''
	model = segmenter.load_model('../models/idmorphs_naworl.model')
	segfun = segmenter.morph_segmenter(model, match='[a-z]+')

	# model_semi = segmenter.load_model('../models/idmorphs.model')
	# segfun_semi = segmenter.morph_segmenter(model_semi, match='[a-z]+')

	users = segmenter.get_users_from_db(whereclause=whereclause)
	male_ids = [user.id for user in users if user.gender == 'M']
	female_ids = [user.id for user in users if user.gender == 'F']

	# unlabeled_users = segmenter.get_users_from_db(tablename='naver')
	# unknown_ids = [user.id for user in unlabeled_users]
	unknown_ids = None
	cls = DoTest(male_ids, female_ids, segfun, unknown_ids, balance=False)

	# cls = cPickle.load(open("../models/init_nonum_semi3.pkl"))
	# cls.segfun = segfun_semi

	test_users = segmenter.get_users_from_db(tablename='naworl_test', whereclause=whereclause)
	test_male_ids = [user.id for user in test_users if user.gender == 'M']
	test_female_ids = [user.id for user in test_users if user.gender == 'F']
Ejemplo n.º 3
0
__author__ = 'hee'

import numpy as np
from collections import defaultdict
import cPickle
from datetime import datetime

import segmenter
from structurer import TwoNamecharStateHMM, NumberStateHMM

if __name__ == '__main__':
	np.set_printoptions(suppress=True, linewidth=200)

	# users = segmenter.get_users_from_db(tablename='naver')
	users = list()
	users.extend(segmenter.get_users_from_db(tablename='naworl_train'))
	users.extend(segmenter.get_users_from_db(tablename='naworl_test'))
	ids = [user.id for user in users]

	model = segmenter.load_model('../models/idmorphs_naworl_num.model')

	file_path = "../models/numhmm_nomonend_naworl_bynaworl.pkl"
	print file_path

	# hmm = NumberStateHMM(n_iter=100)
	# hmm.learn(ids, model)
	# with open(file_path, 'w') as f:
	# 	cPickle.dump(hmm, f)
	# 	print file_path
	hmm = cPickle.load(open(file_path, 'r'))