Ejemplo n.º 1
0
def bench_from_log():
    #logfile = './logs/resnet50-matrixsize-A.log';bs=1;target_bs=1
    #logfile = './logs/resnet34-matrixsize.log';bs=1;target_bs=1
    logfile = './logs/resnet50-matrixsize-ag.log';bs=8;target_bs=32
    workloads = reader.read_tensor_sizes(logfile)
    total_time = []
    num_iters = 50
    total_sizes = []
    for w in workloads:
        m = w[0]*target_bs//bs
        n = w[1]
        t = bench_gemm(m, n, num_iters)
        #t = bench_ops(n, num_iters)
        total_time.append(t)
        total_sizes.append(m*n)
        print('(%d,%d),%f'%(m,n,t))
    print('Log file: ', logfile)
    print('# of Tensors: ', len(total_sizes))
    print('Total size: ', np.sum(total_sizes))
    print('Total time: ', np.sum(total_time))
    print('Max-min-mean-std: ', np.max(total_time), np.min(total_time), np.mean(total_time), np.std(total_time))
Ejemplo n.º 2
0
 def _plot_dnn_tensor(dnn):
     fn = '/Users/lele/shared-server/kfac-logs/%s-matrixsize.log' % (dnn)
     sizes = reader.read_tensor_sizes(fn)
     sizes = [s[0] * (s[0] + 1) // 2 for s in sizes]
     print(
         'dnn: ', dnn, ', min: %d, max: %d, mean: %f' %
         (np.min(sizes), np.max(sizes), np.mean(sizes)))
     counter_dict = {}
     for s in sizes:
         if s not in counter_dict:
             counter_dict[s] = 0
         counter_dict[s] += 1
     keys = list(counter_dict.keys())
     keys.sort()
     print(dnn, 'sizes: ', keys)
     x_pos = [i for i, _ in enumerate(keys)]
     counters = [counter_dict[k] for k in keys]
     #print(dnn, 'counters: ', counters)
     #print(dnn, 'Total tensors: ', np.sum(counters))
     #ax2.bar(x_pos, counters, color='green')
     ax.scatter(np.array(keys) * 4,
                counters,
                color=DNN_COLORS[dnn],
                marker=DNN_MARKERS[dnn],
                facecolors='none',
                linewidth=1,
                label=STANDARD_TITLES[dnn])
     #ax2.set_xticks(x_pos, keys)
     ax.set_xlabel('Tensor size (# of communicated elements)')
     ax.set_ylabel('Count')
     threshold = 128
     idx = 0
     for i, s in enumerate(keys):
         if s > threshold:
             idx = i
             break
     thres_count = np.sum(counters[0:idx])
Ejemplo n.º 3
0
def bench_customize_comm():
    import horovod.torch as hvd
    torch.random.manual_seed(10)
    hvd.init()
    rank = hvd.rank()
    local_rank = hvd.local_rank()
    size = hvd.size()
    torch.cuda.set_device(local_rank)

    logfile = './logs/resnet50-matrixsize-A.log'
    workloads = reader.read_tensor_sizes(logfile)
    tensors = []
    outputs = []
    for w in workloads:
        n = w[0]
        a = torch.rand(n).float().cuda()
        a = a.view(-1, a.size(-1))
        A = a.t() @ (a)
        tensors.append(A)
        outputs.append(A.new_zeros(A.shape))

        communicator = tcmm.Communicator(rank, size)
    warmup = 5
    niters = 10
    for i in range(warmup):
        communicator.multiBcast(tensors, outputs, compute_eigen)
        communicator.synchronize()
    torch.cuda.synchronize()

    stime = time.time()
    for i in range(niters):
        communicator.multiBcast(tensors, outputs, compute_eigen)
        communicator.synchronize()
        torch.cuda.synchronize()
    etime = time.time()
    print('Avg time: ', (etime - stime) / niters)