Ejemplo n.º 1
0
def main(args):

    term = 10000
    start_height = 1
    end_height = dq.get_max_height()
    pool_num = multiprocessing.cpu_count() // 2
    cdq = ClusterDB(args.dbpath)

    stime = time.time()
    u = uf.UnionFind(int(dq.get_max_address()) + 1)
    try:
        for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
            if eheight >= end_height:
                eheight = end_height + 1

            with multiprocessing.Pool(pool_num) as p:
                result = p.imap(one_time_change, range(sheight, eheight))
                for addr_list in result:
                    for addr_set in addr_list:
                        addr_1 = addr_set[0]
                        addr_2 = addr_set[1]
                        print(addr_1, ',', addr_2)
                        u.union(int(addr_1), int(addr_2))
            etime = time.time()
            #print('height: {}, time:{}'.format(eheight, etime-stime))
        del u.rank
        db_write(stime, cdq, u)

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()
Ejemplo n.º 2
0
def main():
    start_addr = 0
    end_addr = dq.get_max_address()
    print("max_addr:", end_addr)
    cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db')
    cdq.create_cluster_table() 
    stime = time.time()
    for i in range(start_addr, end_addr, 10000):    
        cdq.begin_transactions()
        addr_list = dq.get_addr_many(i, i + 10000)
        cdq.insert_cluster_many(addr_list)
        etime = time.time()
        print('addr index: {}, time:{}'.format(i, etime-stime))
        cdq.commit_transactions()
def is_resume():
    cur_address = cdq.get_max_address()
    max_address = dq.get_max_address()
    if cur_address == max_address:
        return False
    return True
def resume():
    if is_resume():
        term = 10000
        start_height = cdq.get_max_height()
        end_height = dq.gext_max_height()
        pool_num = multiprocessing.cpu_count()//2
        s_index = cdq.get_max_address()
        u = uf.UnionFind(dq.get_max_address() - s_index + 1)
        try:
            for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
                if eheight >= end_height:
                    eheight = end_height + 1

                with multiprocessing.Pool(pool_num) as p:
                    result = p.imap(multi_input, range(sheight, eheight))
                    for addr_list in result:
                        for addr_set in addr_list:
                            addr_1 = addr_set[0]
                            addr_2 = addr_set[1]
                            u.union(int(addr_1) - s_index, int(addr_2) - s_index)           
                etime = time.time()
                print('height: {}, time:{}'.format(eheight, etime-stime))
            del u.rank

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()
        
    addr_list = []
    count = 0
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index + s_index), u.find(cluster) + s_index))
        count += 1
    df = pd.DataFrame(addr_list, columns =['Address', 'ClusterNum'])
    mi_group = mi_df.groupby('ClusterNum')

    for cluster_number, addr_group in mi_group:
        if cluster_number != -1:
            addr_list = list(addr_group.Address)
            cluster_num_list = list(cdq.get_cluster_number(addr_list))
            if len(cluster_num_list) <= 1:
                if cluster_num == -1:
                    insert_cluster_many(list(zip(addr_list, [cluster_number] * len(addr_list))))
                else:
                    insert_cluster_many(list(zip(addr_list, [cluster_num] * len(addr_list))))
            else:
                cluster_num_list.sort()
                cluster_num = cluster_num_list.pop(0)
                if cluster_num == -1:
                    cluster_num = cluster_num_list.pop(0)
                #TODO 만약 같은 주소가 존재한다면 update 그렇지 않다면 insert
                update_cluster_many(list(zip([cluster_num] * len(addr_list), addr_list)))
                
            
            
                
        
        
    
    '''
    지속적인 비트코인 주소를 업데이트하는 함수
    1. 현재주소와, 최대주소 비교 (Meta Table 만드는것 추천)
    2. 현재주소와 최대주소가 다르다면, Clustering 시작
    3. start_height = Metatable.blk.+1
       end_height = dq.get_max_height()
    4. index = cur_addr
    5. uf.UnionFind(max_addr - cur_addr + 1)
    6. 아래와 유사
       ** u.union(int(addr_1) - index, int(addr_2) - index) ** ==> 함수 1 union
          addr_list.append((str(index) + index, u.find(cluster)+index)) ==> 함수 2 Clustering
          df = pd.DataFrame(addr_list) 
          for cluster_list groupby 해서 주소리스트를 가져옴: ==> dbwrite
            - 주소들이 포함된 모든 클러스터 번호를 가져옴
              만약 클러스터 번호가 없다면 그대로 add
            - 클러스터 번호가 1개라면 그 번호로 클러스터 add
            - 만약 클러스터 번호가 여러개라면 가장 작은것으로 add후
              다른 클러스터가 있는것은 update
    '''
    
    
def main(args):
    
    term = 10000
    start_height = 1
    end_height = dq.get_max_height()
    pool_num = multiprocessing.cpu_count()//2  
    cdq = ClusterDB(args.dbpath)
    
    stime = time.time()
    u = uf.UnionFind(int(dq.get_max_address())+1)
    try:
        for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
            if eheight >= end_height:
                eheight = end_height + 1

            with multiprocessing.Pool(pool_num) as p:
                result = p.imap(multi_input, range(sheight, eheight))
                for addr_list in result:
                    for addr_set in addr_list:
                        addr_1 = addr_set[0]
                        addr_2 = addr_set[1]
                        u.union(int(addr_1), int(addr_2))           
            etime = time.time()
            print('height: {}, time:{}'.format(eheight, etime-stime))
        del u.rank
        db_write(stime, cdq, u)

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()

            
if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Heuristics Clusterings')
    parser.add_argument('--dbpath', '-d', type=str,
                        required=True,
                        help='insert make dbpath')
    parser.add_argument('--resume', '-r', type=bool,
                        default=False,
                        help='execute resume')

    args = parser.parse_args()
    main(args)