def observed_norm(self, observed): """Accepts a sparse vector of word, list of (word int,count) 2-tuples. Returns real number between -2 and 2. """ numpos = 0 numneg = 0 for n,word,count in topiclib.iterwords(observed): if word in posi: numpos += 1 if word in negi: numneg += 1 ratio = 1.0 normratio = 0.0 if numpos == 0 and numneg == 0: return 0.0 elif numneg == 0: return 2.0 elif numpos == 0: return -2.0 else: if numpos >= numneg: ratio = float(numpos) / numneg normratio = (ratio - 1) else: ratio = -1.0 * float(numneg) / numpos normratio = (ratio + 1) o = graphlib.logistic_sigmoid(normratio) return (4 * o) - 2 # norm to -2 to 2
def test_iterwords(): doc0 = [(0,3), (1,1)] doc1 = [(1,3), (2,2),(0,1)] out = list(lm.iterwords(doc0)) answer = [ (0, 0, 3), (1, 0, 3), (2, 0, 3), (3, 1, 1), ] assert out == answer out2 = list(lm.iterwords(doc1)) answer2 = [ (0, 1, 3), (1, 1, 3), (2, 1, 3), (3, 2, 2), (4, 2, 2), (5, 0, 1), ] assert out2 == answer2