Exemple #1
0
def main(args):

    term = 10000
    start_height = 1
    end_height = dq.get_max_height()
    pool_num = multiprocessing.cpu_count() // 2
    cdq = ClusterDB(args.dbpath)

    stime = time.time()
    u = uf.UnionFind(int(dq.get_max_address()) + 1)
    try:
        for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
            if eheight >= end_height:
                eheight = end_height + 1

            with multiprocessing.Pool(pool_num) as p:
                result = p.imap(one_time_change, range(sheight, eheight))
                for addr_list in result:
                    for addr_set in addr_list:
                        addr_1 = addr_set[0]
                        addr_2 = addr_set[1]
                        print(addr_1, ',', addr_2)
                        u.union(int(addr_1), int(addr_2))
            etime = time.time()
            #print('height: {}, time:{}'.format(eheight, etime-stime))
        del u.rank
        db_write(stime, cdq, u)

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()
Exemple #2
0
def main():
    start_addr = 0
    end_addr = dq.get_max_address()
    print("max_addr:", end_addr)
    cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db')
    cdq.create_cluster_table() 
    stime = time.time()
    for i in range(start_addr, end_addr, 10000):    
        cdq.begin_transactions()
        addr_list = dq.get_addr_many(i, i + 10000)
        cdq.insert_cluster_many(addr_list)
        etime = time.time()
        print('addr index: {}, time:{}'.format(i, etime-stime))
        cdq.commit_transactions()
def main(args):
    count = 0
    stime = time.time()
    csv_list = read_csv(args.csv_file)
    etime = time.time()
    cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db')
    print("DEBUG:", csv_list[0], etime - stime)

    print("START UNION FIND")
    stime = time.time()
    #u = uf.UnionFind(int(cdq.get_max_address())+1)
    u = uf.UnionFind(int(200000000))
    etime = time.time()
    print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}")

    for first, second in csv_list:
        u.union(first, second)
        if count % 10000000 == 0:
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}")
        count += 1
    etime = time.time()
    print(f"UNION FIND END TOTAL TIME:{etime - stime}")

    del u.rank
    print("START CLUSTERING")
    stime = time.time()
    count = 0
    addr_list = list()
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index), u.find(cluster)))

    del u.par
    df = pd.DataFrame(
        addr_list,
        columns=['addr', 'number'],
    )
    df.to_csv('/home/dnlab/DataHDD/cluster_result/40man.csv', index=False)
    etime = time.time()
    print(f"CLUSTERING END:{etime - stime}")
def main(args):
    count = 0
    stime = time.time()
    csv_list = read_csv(args.csv_file)
    etime = time.time()
    cdq = ClusterDB('/home/dnlab/DataHDD/database/multi-input25man.db')
    cdq.create_cluster_table()
    print("DEBUG:", csv_list[0], etime - stime)

    print("START UNION FIND")
    stime = time.time()
    #u = uf.UnionFind(int(dq.get_max_address())+1)
    u = uf.UnionFind(90000000)
    etime = time.time()
    print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}")

    for first, second in csv_list:
        u.union(first, second)
        if count % 10000000 == 0:
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}")
        count += 1
    etime = time.time()
    print(f"UNION FIND END TOTAL TIME:{etime - stime}")

    del u.rank
    print("START CLUSTERING")
    stime = time.time()
    count = 0
    addr_list = list()
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index), u.find(cluster)))
        if count % 10000 == 0:
            cdq.begin_transactions()
            cdq.insert_cluster_many(addr_list)
            cdq.commit_transactions()
            etime = time.time()
            print(
                f"COUNT {count} END, TOTAL TIME: {etime - stime}, {addr_list[len(addr_list)-1]}"
            )
            del addr_list
            addr_list = list()
        count += 1

    cdq.begin_transactions()
    cdq.insert_cluster_many(addr_list)
    cdq.commit_transactions()
    del addr_list

    etime = time.time()
    del u.par

    print(f"CLUSTERING END:{etime - stime}")
Exemple #5
0
import sys

sys.path.append('/home/dnlab/Jupyter-Bitcoin/Heuristics/ExperimentSpeed/')
import pandas as pd
from test_cluster_db_query import ClusterDB
import test_db_query as dq
import csv
import time

cdq = ClusterDB('/home/dnlab/DataSSD/dbv3cluster.db')

df = pd.read_csv('/home/dnlab/DataHDD/tag_v2.csv')
df = df[df['len'] > 500]

clt_list = []
count = 0
for addr_group in df['group'].to_list():

    s_time = time.time()
    addr_list = cdq.find_addr_from_cluster_num(addr_group)
    cluster_list = list(cdq.get_tag_from_addr(addr_list))

    #if len(addr_list) > 10000:
    count += 1
    cls_list.append(",".join(cluster_list))
    if count % 10 == 0:
        print(count, cat_list[-1], time.time() - s_time)

cat_list = []
count = 0
for addr_group in df['group'].to_list():