Ejemplo n.º 1
0
p_lst= [p1,p2]
h = -sum([x*math.log(x,2) for x in p_lst])
print h

print math.log(2.0/3,2)

# H(C|T) = p(T1)*H(C|T1)+p(T2)*H(C|T2)
# p1 = p(C1|T1)
# p2 = p(C2|t2)
# H(C|T1)= -(p1*log(p1))
# Information gain
# IG = H(C) - H(C|T)
# H(C) -

d1 = {'content': 'c d', 'category':'c1'}
d2 = {'content': 'b d', 'category':'c1'}
d3 = {'content': 'a d', 'category':'c2'}
d4 = {'content': 'a c', 'category':'c2'}

terms ={
  'a':{'category_count':{'c1':2,'c2':0},'document_count':2},
  'b':{'category_count':{'c1':1,'c2':0},'document_count':1},
  'c':{'category_count':{'c1':1,'c2':1},'document_count':2},
  'd':{'category_count':{'c1':1,'c2':2},'document_count':3}}

categories ={'c1':2,'c2':2}

classifier = Classifier()
classifier.select(data={'terms':terms,'categories':categories})
print classifier.selected_terms