Ejemplo n.º 1
0
def analyze_graph_from_h5(filename, verbose=False):

    def v_print(vstr):
        if verbose:
            print vstr

    csr_mat = load_h5_to_csr(filename)
    dest, weight, firstEdge, outDegree = get_boruvka_format(csr_mat)
    del csr_mat



    n_e = dest.size
    n_v = firstEdge.size
    mem = (dest.size*dest.itemsize + weight.size*weight.itemsize + firstEdge.size*firstEdge.itemsize + outDegree.size*outDegree.itemsize)/ (1024.0**2)
    print "# edges:            ", n_e
    print "# vertices:         ", n_v
    print "size of graph (MB): ", mem

    times_cpu = list()
    times_gpu = list()
    equal_mst = list()
    equal_cost = list()
    mst_costs = {'cpu':list(), 'gpu':list()}
    t1, t2 = Timer(), Timer()

    for r in range(10):
        v_print('------ Round {} -------'.format(r))
        t1.reset()
        t1.tic()
        mst1, n_edges1 = boruvka_minho_seq(dest, weight, firstEdge, outDegree)
        t1.tac()
        v_print('CPU finished in {} s'.format(t1.elapsed))

        if n_edges1 < mst1.size:
            mst1 = mst1[:n_edges1]

        t2.reset()
        t2.tic()
        mst2, n_edges2 = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=512)
        t2.tac()
        v_print('GPU finished in {} s'.format(t2.elapsed))


        if n_edges2 < mst2.size:
            mst2 = mst2[:n_edges2]


        mst_costs['cpu'].append(weight[mst1].sum())
        mst_costs['gpu'].append(weight[mst2].sum())
        equal_mst.append(np.in1d(mst1,mst2).all())
        equal_cost.append(weight[mst1].sum() == weight[mst2].sum())

        if r > 0:
            times_cpu.append(t1.elapsed)
            times_gpu.append(t2.elapsed)



    max_cost = max((max(mst_costs['cpu']), max(mst_costs['gpu'])))
    cost_error = map(lambda x: abs(x[0]-x[1]), zip(*mst_costs.values()))
    cost_error = map(lambda x: x/max_cost, cost_error)
    error_threshold = 1e-5

    cpu_str = ''
    for t in times_cpu:
        cpu_str += str(t) + ','

    gpu_str = ''
    for t in times_gpu:
        gpu_str += str(t) + ','

    cpu_costs = ''
    for c in mst_costs['cpu']:
        cpu_costs += str(c) + ','
    gpu_costs = ''
    for c in mst_costs['gpu']:
        gpu_costs += str(c) + ','    

    print 'dataset: {}'.format(os.path.basename(filename))
    print 'CPU times,{},{},{},{}'.format(n_e,n_v,mem,cpu_str[:-1])
    print 'GPU times,{},{},{},{}'.format(n_e,n_v,mem,gpu_str[:-1])
    print 'CPU costs,{},{},{},{}'.format(n_e,n_v,mem,cpu_costs[:-1])
    print 'GPU costs,{},{},{},{}'.format(n_e,n_v,mem,gpu_costs[:-1])    
    print ''
    print 'All equal MSTs: {}'.format(np.all(np.array(equal_mst) == equal_mst[0]))
    print 'All equal costs: {}'.format(np.all(equal_cost))
    print 'All cost errors <= {}: {}'.format(error_threshold, np.all(map(lambda x:x<error_threshold, cost_error)))
    print 'Max normalized error: {}'.format(max(cost_error))

    speedup = np.array(times_cpu) / np.array(times_gpu)

    print 'Times(s)\tMean\tStd\tMax\tMin'
    print 'CPU     \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(times_cpu), np.std(times_cpu), np.max(times_cpu), np.min(times_cpu))
    print 'GPU     \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(times_gpu), np.std(times_gpu), np.max(times_gpu), np.min(times_gpu))
    print 'SpeedUp \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(speedup), np.std(speedup), np.max(speedup), np.min(speedup))
    print 'Error   \t{:.5F}\t{:.5F}\t{:.5F}\t{:.5F}'.format(np.mean(cost_error), np.std(cost_error), np.max(cost_error), np.min(cost_error))