Ejemplo n.º 1
0
def purity_report():
    global modeldir, cPathFrame, num2path, path2num, scored_paths, frameScales
    print "<table>"
    print "<br>%d scored paths in model vocab" % len(scored_paths)
    print "<br>showing top-99% scored paths mass per topic"
    print "<thead><th>k <th>score <th>num nonzero types <th>n tok <th>paths"
    print "<tbody>"
    for k in range(cPathFrame.shape[1]):
        print "<tr><td>", k
        total = sum([cPathFrame[path2num[p],k] for p in scored_paths])
        nnz = sum([ cPathFrame[path2num[p],k] > 0 for p in scored_paths] )
        sumscore = np.sum([path2score[p]*cPathFrame[path2num[p],k] for p in scored_paths])
        meanscore = sumscore / total
        print "<td>"
        # print "%.3g mean score, over <td>%d non-zero types, <td>%d total tokens" % (meanscore, nnz, total)
        print "%.3g <td>%d <td>%d" % (meanscore, nnz, total)

        print "<td>"
        pathorder = sorted(scored_paths, key=lambda p: -cPathFrame[path2num[p],k])
        pathorder = [p for p in pathorder if cPathFrame[path2num[p],k] > 0]
        cumsum = 0
        for p in pathorder:
            count = cPathFrame[path2num[p], k]
            cumsum += count
            if cumsum >= 0.99*total: break
            print util.nicepath(p)
            s = path2score[p]
            valence = "pos" if s>0 else "neg" if s<0 else "neu" if s==0 else None
            codeurl = "http://brenocon.com/tabari_cameo_verbs.html#" + path2code[p]
            print "<span class=wordinfo>(%.1f <span class='score %s'>%.1f <a href='%s'>%s</a></span>)</span>" % (
                    count, valence, s, codeurl, path2code[p])
            print ', &nbsp; '
    print "</table>"
Ejemplo n.º 2
0
print "<b>" + prefix + "</b>"
print "only showing paths with count >=", wc_thresh

print "<table class=tablesorter cellpadding=3 border=1 cellspacing=0 width='100%'>"

thead = ['eventtype'] + ['count', 'alpha', 'etaVar'
                         ] + ['d=%d' % dim
                              for dim in range(Ndim)] + ['top paths']
print "<thead>", ' '.join(["<th>" + x for x in thead]), "</thead>"
print "<tbody>"
print

for k in frameorder:
    top_paths = (-path_frames[:, k]).argsort()[:20]
    top_paths = top_paths[path_frames[top_paths, k] >= wc_thresh]
    pathelts = [util.nicepath(x) for x in path_vocab[top_paths]]
    # pathelts = ["%s <span class=wordinfo>(%.0f)</span>" % (util.nicepath(path_vocab[i]), path_frames[i,k]) for i in top_paths]
    pathelts = ["%s" % (util.nicepath(path_vocab[i]), ) for i in top_paths]

    pathinfo = ',&nbsp; '.join(pathelts)
    row = [
        'f=%s' % k,
        str(framecounts[k]),
        '%.3g' % alpha[k],
        '%.3g' % etaVar[k],
    ]
    row += ["%.3g" % x for x in frame_scales[k, :]]
    row += [pathinfo]
    # row += [str(path_vocab[top_paths])]
    print '<tr>' + ' '.join('<td>' + str(x) for x in row)
Ejemplo n.º 3
0
import json, sys
import util

for line in sys.stdin:
    parts = line.rstrip('\n').split('\t')
    parts[-1] = util.nicepath(parts[-1], html=False)
    print '\t'.join(parts)
util.pageheader()

print "<b>" + prefix + "</b>"
print "only showing paths with count >=", wc_thresh

print "<table class=tablesorter cellpadding=3 border=1 cellspacing=0 width='100%'>"

thead = ['eventtype'] + ['count','alpha','etaVar'] + ['d=%d' % dim for dim in range(Ndim)] + ['top paths']
print "<thead>", ' '.join(["<th>"+x for x in thead]), "</thead>"
print "<tbody>"
print

for k in frameorder:
    top_paths = (-path_frames[:,k]).argsort()[:20]
    top_paths = top_paths[ path_frames[top_paths,k] >= wc_thresh]
    pathelts = [util.nicepath(x) for x in path_vocab[top_paths]]
    # pathelts = ["%s <span class=wordinfo>(%.0f)</span>" % (util.nicepath(path_vocab[i]), path_frames[i,k]) for i in top_paths]
    pathelts = ["%s" % (util.nicepath(path_vocab[i]),) for i in top_paths]

    pathinfo = ',&nbsp; '.join(pathelts)
    row = ['f=%s' % k, str(framecounts[k]),
            '%.3g' % alpha[k], '%.3g' % etaVar[k],
        ]
    row += ["%.3g" % x for x in frame_scales[k,:]]
    row += [pathinfo]
    # row += [str(path_vocab[top_paths])]
    print '<tr>' + ' '.join('<td>'+str(x) for x in row)

print "</tbody>"
print "</table>"