if word.startswith("#"): count = 0 if page in pagedict: pagedict[page] += count else: pagedict[page] = count htid = filename[0:-7] filedict[htid] = pagedict with open( "/Users/tunder/Dropbox/pagedata/seventhfeatures/pagelevelwordcounts.tsv", mode="w", encoding="utf-8") as f: f.write("htid\tpage\twordcount\n") for htid, pagedict in filedict.items(): tuplelist = utils.sortvaluesbykey(pagedict) counter = 0 for twotuple in tuplelist: pagenum, count = twotuple if pagenum < 0: continue elif pagenum != counter: print("pagination anomaly") else: counter += 1 outline = htid + "\t" + str(pagenum) + "\t" + str(count) + "\n" f.write(outline)
if word.startswith("#"): count = 0 if page in pagedict: pagedict[page] += count else: pagedict[page] = count htid = filename[0:-7] filedict[htid] = pagedict with open("/Users/tunder/Dropbox/pagedata/seventhfeatures/pagelevelwordcounts.tsv", mode="w", encoding="utf-8") as f: f.write("htid\tpage\twordcount\n") for htid, pagedict in filedict.items(): tuplelist = utils.sortvaluesbykey(pagedict) counter = 0 for twotuple in tuplelist: pagenum, count = twotuple if pagenum < 0: continue elif pagenum != counter: print("pagination anomaly") else: counter += 1 outline = htid + "\t" + str(pagenum) + "\t" + str(count) + "\n" f.write(outline)