from db import DB import sys, os if __name__ == '__main__': if len(sys.argv) < 3 or sys.argv[1] == '-h': print 'usage: dumpdb.py [all|attrs|data] [database]' sys.exit(1) if not os.path.exists(sys.argv[2]): print >>sys.stderr, 'file', sys.argv[2], 'does not exist' sys.exit(1) db = DB() db.open(sys.argv[2]) if sys.argv[1] == 'attrs' or sys.argv[1] == 'all': for name, vals in sorted(db.attrs().items()): print name[1] + ':' + ','.join(vals) if sys.argv[1] == 'data' or sys.argv[1] == 'all': for row in db.data(): print ','.join(row)
# if all the data classes are the same, then just return that classification if classes.count(classes[0]) == len(classes): return classes[0] best = choosebest(data, attrs) tree = { best.name: { } } for val in best.vals: newdata = [a for a in data if a[best.num] == val] newattrs = [a for a in attrs if a.num != best.num] tree[best.name][val] = gentree(newdata, newattrs) return tree import sys from bz2 import BZ2File import cPickle as pickle from db import DB if __name__ == '__main__': if len(sys.argv) < 3 or sys.argv[1] == '-h': print 'usage: gentree.py [database] [tree]' sys.exit(1) db = DB() db.open(sys.argv[1]) tree = gentree(db.data(), db.attrs()) f = BZ2File(sys.argv[2], 'wb') pickle.dump(tree, f, 2) f.close()