def prune(L, minsup, k): for name, pattern in L.items(): if pattern.support() < minsup: del L[name] else: timeintervals, evids = pattern.timeintervals() if len(timeintervals) > 0: km.build(timeintervals, k) # remove unsatisfied evidence kluster = km.getClusters() for e, c in zip(evids, km.cluster): if len(kluster[c]) < minsup: for key, value in e.items(): # print "yeah remove %s from %s" % (value, key) pattern.removeEvidence(key, value) if pattern.support() < minsup: # print "prune by cluster" del L[name]
def main(): data = spatio_data_generator(territory={'x':1000,'y':1000}, seeds=[{'x':200,'y':200}, {'x':700,'y':700}, {'x':700,'y':400}], pts = [50, 50, 50], noise=30, eps=100) # x = [] # y = [] # start = time.time() # rt = rtree.RTree(1, 10) # for instance in data: # rt.insert(rtree.Box(instance, [0,0])) # end = time.time() # timer = end-start # print rt.root # print "build tree completed in %.2fs" % timer # # query = rt.search(rtree.Box([200, 200], [100, 100])) # print len(query) # print query # # start = time.time() # opt.build(data, 200, 10) # end = time.time() # # print "===== Finished in %.2fs=====" % (end-start) # # n = opt.cluster(200) # opt.report() # opt.plot() # opt.plotc(n) # print n # opt.plotr(400) km.build(data, 3, 10, 1000) print km.cluster # km.plot() clusters = km.getClusters() print "%d clusters: %d and %d and %d" % (len(clusters), len(clusters[0]), len(clusters[1]), len(clusters[2])) km.plot()