def purity_report(): global modeldir, cPathFrame, num2path, path2num, scored_paths, frameScales print "<table>" print "<br>%d scored paths in model vocab" % len(scored_paths) print "<br>showing top-99% scored paths mass per topic" print "<thead><th>k <th>score <th>num nonzero types <th>n tok <th>paths" print "<tbody>" for k in range(cPathFrame.shape[1]): print "<tr><td>", k total = sum([cPathFrame[path2num[p],k] for p in scored_paths]) nnz = sum([ cPathFrame[path2num[p],k] > 0 for p in scored_paths] ) sumscore = np.sum([path2score[p]*cPathFrame[path2num[p],k] for p in scored_paths]) meanscore = sumscore / total print "<td>" # print "%.3g mean score, over <td>%d non-zero types, <td>%d total tokens" % (meanscore, nnz, total) print "%.3g <td>%d <td>%d" % (meanscore, nnz, total) print "<td>" pathorder = sorted(scored_paths, key=lambda p: -cPathFrame[path2num[p],k]) pathorder = [p for p in pathorder if cPathFrame[path2num[p],k] > 0] cumsum = 0 for p in pathorder: count = cPathFrame[path2num[p], k] cumsum += count if cumsum >= 0.99*total: break print util.nicepath(p) s = path2score[p] valence = "pos" if s>0 else "neg" if s<0 else "neu" if s==0 else None codeurl = "http://brenocon.com/tabari_cameo_verbs.html#" + path2code[p] print "<span class=wordinfo>(%.1f <span class='score %s'>%.1f <a href='%s'>%s</a></span>)</span>" % ( count, valence, s, codeurl, path2code[p]) print ', ' print "</table>"
print "<b>" + prefix + "</b>" print "only showing paths with count >=", wc_thresh print "<table class=tablesorter cellpadding=3 border=1 cellspacing=0 width='100%'>" thead = ['eventtype'] + ['count', 'alpha', 'etaVar' ] + ['d=%d' % dim for dim in range(Ndim)] + ['top paths'] print "<thead>", ' '.join(["<th>" + x for x in thead]), "</thead>" print "<tbody>" print for k in frameorder: top_paths = (-path_frames[:, k]).argsort()[:20] top_paths = top_paths[path_frames[top_paths, k] >= wc_thresh] pathelts = [util.nicepath(x) for x in path_vocab[top_paths]] # pathelts = ["%s <span class=wordinfo>(%.0f)</span>" % (util.nicepath(path_vocab[i]), path_frames[i,k]) for i in top_paths] pathelts = ["%s" % (util.nicepath(path_vocab[i]), ) for i in top_paths] pathinfo = ', '.join(pathelts) row = [ 'f=%s' % k, str(framecounts[k]), '%.3g' % alpha[k], '%.3g' % etaVar[k], ] row += ["%.3g" % x for x in frame_scales[k, :]] row += [pathinfo] # row += [str(path_vocab[top_paths])] print '<tr>' + ' '.join('<td>' + str(x) for x in row)
import json, sys import util for line in sys.stdin: parts = line.rstrip('\n').split('\t') parts[-1] = util.nicepath(parts[-1], html=False) print '\t'.join(parts)
util.pageheader() print "<b>" + prefix + "</b>" print "only showing paths with count >=", wc_thresh print "<table class=tablesorter cellpadding=3 border=1 cellspacing=0 width='100%'>" thead = ['eventtype'] + ['count','alpha','etaVar'] + ['d=%d' % dim for dim in range(Ndim)] + ['top paths'] print "<thead>", ' '.join(["<th>"+x for x in thead]), "</thead>" print "<tbody>" print for k in frameorder: top_paths = (-path_frames[:,k]).argsort()[:20] top_paths = top_paths[ path_frames[top_paths,k] >= wc_thresh] pathelts = [util.nicepath(x) for x in path_vocab[top_paths]] # pathelts = ["%s <span class=wordinfo>(%.0f)</span>" % (util.nicepath(path_vocab[i]), path_frames[i,k]) for i in top_paths] pathelts = ["%s" % (util.nicepath(path_vocab[i]),) for i in top_paths] pathinfo = ', '.join(pathelts) row = ['f=%s' % k, str(framecounts[k]), '%.3g' % alpha[k], '%.3g' % etaVar[k], ] row += ["%.3g" % x for x in frame_scales[k,:]] row += [pathinfo] # row += [str(path_vocab[top_paths])] print '<tr>' + ' '.join('<td>'+str(x) for x in row) print "</tbody>" print "</table>"