def main(args): term = 10000 start_height = 1 end_height = dq.get_max_height() pool_num = multiprocessing.cpu_count() // 2 cdq = ClusterDB(args.dbpath) stime = time.time() u = uf.UnionFind(int(dq.get_max_address()) + 1) try: for sheight, eheight in zip(range(start_height, end_height, term), \ range(start_height+term, end_height+term, term)): if eheight >= end_height: eheight = end_height + 1 with multiprocessing.Pool(pool_num) as p: result = p.imap(one_time_change, range(sheight, eheight)) for addr_list in result: for addr_set in addr_list: addr_1 = addr_set[0] addr_2 = addr_set[1] print(addr_1, ',', addr_2) u.union(int(addr_1), int(addr_2)) etime = time.time() #print('height: {}, time:{}'.format(eheight, etime-stime)) del u.rank db_write(stime, cdq, u) except KeyboardInterrupt: print('Keyboard Interrupt Detected! Commit transactions...') cdq.commit_transactions()
def main(): start_addr = 0 end_addr = dq.get_max_address() print("max_addr:", end_addr) cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db') cdq.create_cluster_table() stime = time.time() for i in range(start_addr, end_addr, 10000): cdq.begin_transactions() addr_list = dq.get_addr_many(i, i + 10000) cdq.insert_cluster_many(addr_list) etime = time.time() print('addr index: {}, time:{}'.format(i, etime-stime)) cdq.commit_transactions()
def main(args): count = 0 stime = time.time() csv_list = read_csv(args.csv_file) etime = time.time() cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db') print("DEBUG:", csv_list[0], etime - stime) print("START UNION FIND") stime = time.time() #u = uf.UnionFind(int(cdq.get_max_address())+1) u = uf.UnionFind(int(200000000)) etime = time.time() print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}") for first, second in csv_list: u.union(first, second) if count % 10000000 == 0: etime = time.time() print(f"COUNT {count} END, TOTAL TIME: {etime - stime}") count += 1 etime = time.time() print(f"UNION FIND END TOTAL TIME:{etime - stime}") del u.rank print("START CLUSTERING") stime = time.time() count = 0 addr_list = list() for index, cluster in enumerate(u.par): addr_list.append((str(index), u.find(cluster))) del u.par df = pd.DataFrame( addr_list, columns=['addr', 'number'], ) df.to_csv('/home/dnlab/DataHDD/cluster_result/40man.csv', index=False) etime = time.time() print(f"CLUSTERING END:{etime - stime}")
def main(args): count = 0 stime = time.time() csv_list = read_csv(args.csv_file) etime = time.time() cdq = ClusterDB('/home/dnlab/DataHDD/database/multi-input25man.db') cdq.create_cluster_table() print("DEBUG:", csv_list[0], etime - stime) print("START UNION FIND") stime = time.time() #u = uf.UnionFind(int(dq.get_max_address())+1) u = uf.UnionFind(90000000) etime = time.time() print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}") for first, second in csv_list: u.union(first, second) if count % 10000000 == 0: etime = time.time() print(f"COUNT {count} END, TOTAL TIME: {etime - stime}") count += 1 etime = time.time() print(f"UNION FIND END TOTAL TIME:{etime - stime}") del u.rank print("START CLUSTERING") stime = time.time() count = 0 addr_list = list() for index, cluster in enumerate(u.par): addr_list.append((str(index), u.find(cluster))) if count % 10000 == 0: cdq.begin_transactions() cdq.insert_cluster_many(addr_list) cdq.commit_transactions() etime = time.time() print( f"COUNT {count} END, TOTAL TIME: {etime - stime}, {addr_list[len(addr_list)-1]}" ) del addr_list addr_list = list() count += 1 cdq.begin_transactions() cdq.insert_cluster_many(addr_list) cdq.commit_transactions() del addr_list etime = time.time() del u.par print(f"CLUSTERING END:{etime - stime}")
import sys sys.path.append('/home/dnlab/Jupyter-Bitcoin/Heuristics/ExperimentSpeed/') import pandas as pd from test_cluster_db_query import ClusterDB import test_db_query as dq import csv import time cdq = ClusterDB('/home/dnlab/DataSSD/dbv3cluster.db') df = pd.read_csv('/home/dnlab/DataHDD/tag_v2.csv') df = df[df['len'] > 500] clt_list = [] count = 0 for addr_group in df['group'].to_list(): s_time = time.time() addr_list = cdq.find_addr_from_cluster_num(addr_group) cluster_list = list(cdq.get_tag_from_addr(addr_list)) #if len(addr_list) > 10000: count += 1 cls_list.append(",".join(cluster_list)) if count % 10 == 0: print(count, cat_list[-1], time.time() - s_time) cat_list = [] count = 0 for addr_group in df['group'].to_list():