nulhash = int(pyhashxx.hashxx('\0' * 4096)/10) freq = {} tables = [] fi = 0 frqc = 0 if os.path.isfile("dumps/dumps.cache"): print "Loading hashlist... (cached)" (frqc, tables) = cPickle.load(open("dumps/dumps.cache", "rb")) else: print "Loading hashes and purging sole occurences" for file in sorted(glob.glob("dumps/*.dump")): tp = cPickle.load(open(file, "rb")) table = UserDict(tp) table.filename = os.path.basename(file).replace(".dump","") table.idx = fi fi += 1 table.memory = (768,1024)[random.randint(0,1)] tables.append(table) for k,v in table.items(): freq[k] = freq.get(k, 0) + v print("Length of table %s: %d" % (file, len(table))) print("Length of freqtable: %s" % len(freq)) for k,v in freq.items(): if v == 1 or k == nulhash: del freq[k] for table in tables: if k in table: