def main(args): term = 10000 start_height = 1 end_height = dq.get_max_height() pool_num = multiprocessing.cpu_count() // 2 cdq = ClusterDB(args.dbpath) stime = time.time() u = uf.UnionFind(int(dq.get_max_address()) + 1) try: for sheight, eheight in zip(range(start_height, end_height, term), \ range(start_height+term, end_height+term, term)): if eheight >= end_height: eheight = end_height + 1 with multiprocessing.Pool(pool_num) as p: result = p.imap(one_time_change, range(sheight, eheight)) for addr_list in result: for addr_set in addr_list: addr_1 = addr_set[0] addr_2 = addr_set[1] print(addr_1, ',', addr_2) u.union(int(addr_1), int(addr_2)) etime = time.time() #print('height: {}, time:{}'.format(eheight, etime-stime)) del u.rank db_write(stime, cdq, u) except KeyboardInterrupt: print('Keyboard Interrupt Detected! Commit transactions...') cdq.commit_transactions()
def main(): start_addr = 0 end_addr = dq.get_max_address() print("max_addr:", end_addr) cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db') cdq.create_cluster_table() stime = time.time() for i in range(start_addr, end_addr, 10000): cdq.begin_transactions() addr_list = dq.get_addr_many(i, i + 10000) cdq.insert_cluster_many(addr_list) etime = time.time() print('addr index: {}, time:{}'.format(i, etime-stime)) cdq.commit_transactions()
def is_resume(): cur_address = cdq.get_max_address() max_address = dq.get_max_address() if cur_address == max_address: return False return True
def resume(): if is_resume(): term = 10000 start_height = cdq.get_max_height() end_height = dq.gext_max_height() pool_num = multiprocessing.cpu_count()//2 s_index = cdq.get_max_address() u = uf.UnionFind(dq.get_max_address() - s_index + 1) try: for sheight, eheight in zip(range(start_height, end_height, term), \ range(start_height+term, end_height+term, term)): if eheight >= end_height: eheight = end_height + 1 with multiprocessing.Pool(pool_num) as p: result = p.imap(multi_input, range(sheight, eheight)) for addr_list in result: for addr_set in addr_list: addr_1 = addr_set[0] addr_2 = addr_set[1] u.union(int(addr_1) - s_index, int(addr_2) - s_index) etime = time.time() print('height: {}, time:{}'.format(eheight, etime-stime)) del u.rank except KeyboardInterrupt: print('Keyboard Interrupt Detected! Commit transactions...') cdq.commit_transactions() addr_list = [] count = 0 for index, cluster in enumerate(u.par): addr_list.append((str(index + s_index), u.find(cluster) + s_index)) count += 1 df = pd.DataFrame(addr_list, columns =['Address', 'ClusterNum']) mi_group = mi_df.groupby('ClusterNum') for cluster_number, addr_group in mi_group: if cluster_number != -1: addr_list = list(addr_group.Address) cluster_num_list = list(cdq.get_cluster_number(addr_list)) if len(cluster_num_list) <= 1: if cluster_num == -1: insert_cluster_many(list(zip(addr_list, [cluster_number] * len(addr_list)))) else: insert_cluster_many(list(zip(addr_list, [cluster_num] * len(addr_list)))) else: cluster_num_list.sort() cluster_num = cluster_num_list.pop(0) if cluster_num == -1: cluster_num = cluster_num_list.pop(0) #TODO 만약 같은 주소가 존재한다면 update 그렇지 않다면 insert update_cluster_many(list(zip([cluster_num] * len(addr_list), addr_list))) ''' 지속적인 비트코인 주소를 업데이트하는 함수 1. 현재주소와, 최대주소 비교 (Meta Table 만드는것 추천) 2. 현재주소와 최대주소가 다르다면, Clustering 시작 3. start_height = Metatable.blk.+1 end_height = dq.get_max_height() 4. index = cur_addr 5. uf.UnionFind(max_addr - cur_addr + 1) 6. 아래와 유사 ** u.union(int(addr_1) - index, int(addr_2) - index) ** ==> 함수 1 union addr_list.append((str(index) + index, u.find(cluster)+index)) ==> 함수 2 Clustering df = pd.DataFrame(addr_list) for cluster_list groupby 해서 주소리스트를 가져옴: ==> dbwrite - 주소들이 포함된 모든 클러스터 번호를 가져옴 만약 클러스터 번호가 없다면 그대로 add - 클러스터 번호가 1개라면 그 번호로 클러스터 add - 만약 클러스터 번호가 여러개라면 가장 작은것으로 add후 다른 클러스터가 있는것은 update ''' def main(args): term = 10000 start_height = 1 end_height = dq.get_max_height() pool_num = multiprocessing.cpu_count()//2 cdq = ClusterDB(args.dbpath) stime = time.time() u = uf.UnionFind(int(dq.get_max_address())+1) try: for sheight, eheight in zip(range(start_height, end_height, term), \ range(start_height+term, end_height+term, term)): if eheight >= end_height: eheight = end_height + 1 with multiprocessing.Pool(pool_num) as p: result = p.imap(multi_input, range(sheight, eheight)) for addr_list in result: for addr_set in addr_list: addr_1 = addr_set[0] addr_2 = addr_set[1] u.union(int(addr_1), int(addr_2)) etime = time.time() print('height: {}, time:{}'.format(eheight, etime-stime)) del u.rank db_write(stime, cdq, u) except KeyboardInterrupt: print('Keyboard Interrupt Detected! Commit transactions...') cdq.commit_transactions() if __name__=="__main__": import argparse parser = argparse.ArgumentParser(description='Heuristics Clusterings') parser.add_argument('--dbpath', '-d', type=str, required=True, help='insert make dbpath') parser.add_argument('--resume', '-r', type=bool, default=False, help='execute resume') args = parser.parse_args() main(args)