Example #1
0
def main(args):
    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nid = np.nonzero(train_mask)[0].astype(np.int64)
    vnum = train_mask.shape[0]

    num_hops = args.n_layers if args.preprocess else args.n_layers + 1

    for epoch in range(args.n_epochs):
        #epoch_load_vnum = 0
        freq = np.zeros(vnum, dtype=np.int64)
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       args.num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=16,
                                                       num_hops=num_hops,
                                                       seed_nodes=train_nid,
                                                       prefetch=False):
            #epoch_load_vnum += count_nf_vnum(nf)
            count_vertex_freq(nf, freq)
        hit_rate = optimal_cache_hit(freq, 0.2)
        print('Oracle cache hit rate: ', hit_rate)
Example #2
0
def main(args):

    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    test_nid = np.nonzero(test_mask)[0].astype(np.int64)

    labels = torch.LongTensor(labels)

    if args.arch == 'gcn-nssc':
        from PaGraph.model.gcn_nssc import GCNSampling, GCNInfer
        infer_model = GCNInfer(args.feat_size, 32, n_classes, args.n_layers,
                               F.relu, args.preprocess)
    elif args.arch == 'gs-nssc':
        from PaGraph.model.graphsage_nssc import GraphSageSampling
        infer_model = GraphSageSampling(args.feat_size, 16, n_classes,
                                        args.n_layers, F.relu, 0, 'mean',
                                        args.preprocess)
    else:
        print('Unknown arch')
        sys.exit(-1)

    gnneval(args, infer_model, g, labels, 0, test_nid)
Example #3
0
def main(args):
    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nid = np.nonzero(train_mask)[0].astype(np.int64)

    num_hops = args.n_layers if args.preprocess else args.n_layers + 1

    for epoch in range(args.n_epochs):
        epoch_load_vnum = 0
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       args.num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=16,
                                                       num_hops=num_hops,
                                                       seed_nodes=train_nid,
                                                       prefetch=False):
            epoch_load_vnum += count_nf_vnum(nf)
        print('Epoch loaded vertex#: ', epoch_load_vnum)
Example #4
0
                        type=int,
                        default=2,
                        help="num of partitions")
    parser.add_argument("--num-hops",
                        type=int,
                        default=1,
                        help="num of hop neighbors required for a batch")
    parser.add_argument("--ordering", dest='ordering', action='store_true')
    parser.set_defaults(ordering=False)
    args = parser.parse_args()

    # get data
    adj = spsp.load_npz(os.path.join(args.dataset, 'adj.npz'))
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nids = np.nonzero(train_mask)[0].astype(np.int64)
    labels = data.get_labels(args.dataset)

    # ordering
    if args.ordering:
        print('re-ordering graphs...')
        adj = adj.tocsc()
        adj, vmap = ordering.reordering(
            adj, depth=args.num_hop)  # vmap: orig -> new
        # save to files
        mapv = np.zeros(vmap.shape, dtype=np.int64)
        mapv[vmap] = np.arange(vmap.shape[0])  # mapv: new -> orig
        train_nids = np.sort(vmap[train_nids])
        spsp.save_npz(os.path.join(args.dataset, 'adj.npz'), adj)
        np.save(os.path.join(args.dataset, 'labels.npy'), labels[mapv])
        np.save(os.path.join(args.dataset, 'train.npy'), train_mask[mapv])
        np.save(os.path.join(args.dataset, 'val.npy'), val_mask[mapv])
Example #5
0
def trainer(rank, world_size, args, backend='nccl'):
    # init multi process
    init_process(rank, world_size, backend)

    # load data
    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    # masks for semi-supervised learning
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nid = np.nonzero(train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(test_mask)[0].astype(np.int64)
    chunk_size = int(train_nid.shape[0] / world_size) - 1
    train_nid = train_nid[chunk_size * rank:chunk_size * (rank + 1)]
    # to torch tensor
    labels = torch.LongTensor(labels)
    train_mask = torch.ByteTensor(train_mask)
    val_mask = torch.ByteTensor(val_mask)
    test_mask = torch.ByteTensor(test_mask)

    # prepare model
    num_hops = args.n_layers if args.preprocess else args.n_layers + 1
    model = GCNSampling(args.feat_size, args.n_hidden, n_classes,
                        args.n_layers, F.relu, args.dropout, args.preprocess)
    infer_model = GCNInfer(args.feat_size, args.n_hidden, n_classes,
                           args.n_layers, F.relu)
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    model.cuda(rank)
    infer_model.cuda(rank)
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
    ctx = torch.device(rank)

    # start training
    epoch_dur = []
    sock = PaGraph.utils.trainer(port=8200 + rank)
    port = 8760
    sampler = dgl.contrib.sampling.SamplerReceiver(g,
                                                   '127.0.0.1:' +
                                                   str(port + rank),
                                                   1,
                                                   net_type='socket')
    with torch.autograd.profiler.profile(enabled=(rank == 0),
                                         use_cuda=True) as prof:
        for epoch in range(args.n_epochs):
            model.train()
            epoch_start_time = time.time()
            step = 0
            for nf in sampler:
                with torch.autograd.profiler.record_function('gpu-load'):
                    nf.copy_from_parent(ctx=ctx)
                    batch_nids = nf.layer_parent_nid(-1)
                    label = labels[batch_nids]
                    label = label.cuda(rank, non_blocking=True)
                with torch.autograd.profiler.record_function('gpu-compute'):
                    pred = model(nf)
                    loss = loss_fcn(pred, label)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                step += 1
                if rank == 0 and step % 20 == 0:
                    print('epoch [{}] step [{}]. Loss: {:.4f}'.format(
                        epoch + 1, step, loss.item()))
                if step % 100 == 0:
                    PaGraph.utils.barrier(sock, role='trainer')
            if rank == 0:
                epoch_dur.append(time.time() - epoch_start_time)
                print('Epoch average time: {:.4f}'.format(
                    np.mean(np.array(epoch_dur[2:]))))
            # epoch barrier
            PaGraph.utils.barrier(sock, role='trainer')
    if rank == 0:
        print(prof.key_averages().table(sort_by='cuda_time_total'))
Example #6
0
def trainer(rank, world_size, args, backend='nccl'):
  # init multi process
  init_process(rank, world_size, backend)
  
  # load data
  dataname = os.path.basename(args.dataset)
  g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
  labels = data.get_labels(args.dataset)
  n_classes = args.n_classes
  # masks for semi-supervised learning
  train_mask, val_mask, test_mask = data.get_masks(args.dataset)
  train_nid = np.nonzero(train_mask)[0].astype(np.int64)
  chunk_size = int(train_nid.shape[0] / world_size) - 1
  train_nid = train_nid[chunk_size * rank:chunk_size * (rank + 1)]
  test_nid = np.nonzero(test_mask)[0].astype(np.int64)
  # to torch tensor
  labels = torch.LongTensor(labels)
  train_mask = torch.ByteTensor(train_mask)
  val_mask = torch.ByteTensor(val_mask)
  test_mask = torch.ByteTensor(test_mask)

  # cacher
  embed_names = ['features', 'norm']
  vnum = train_mask.size(0)
  maps = torch.arange(vnum)
  cacher = storage.GraphCacheServer(g, vnum, maps, rank)
  cacher.init_field(embed_names)
  cacher.log = False

  # prepare model
  model = GCNSampling(args.feat_size,
                      args.n_hidden,
                      n_classes,
                      args.n_layers,
                      F.relu,
                      args.dropout,
                      args.preprocess)
  loss_fcn = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
  model.cuda(rank)
  model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])

  # sampler
  num_hops = args.n_layers if args.preprocess else args.n_layers + 1
  if args.remote_sample:
    sampler = SampleLoader(g, rank, one2all=False)
  else:
    sampler = dgl.contrib.sampling.NeighborSampler(
      g, args.batch_size,
      args.num_neighbors, neighbor_type='in',
      shuffle=True, num_workers=16, num_hops=num_hops,
      seed_nodes=train_nid, prefetch=True)

  # start training
  epoch_dur = []
  tic = time.time()
  with torch.autograd.profiler.profile(enabled=(rank==0), use_cuda=True) as prof:
    for epoch in range(args.n_epochs):
      model.train()
      epoch_start_time = time.time()
      step = 0
      for nf in sampler:
        with torch.autograd.profiler.record_function('gpu-load'):
          cacher.fetch_data(nf)
          batch_nids = nf.layer_parent_nid(-1)
          label = labels[batch_nids]
          label = label.cuda(rank, non_blocking=True)
        with torch.autograd.profiler.record_function('gpu-compute'):
          pred = model(nf)
          loss = loss_fcn(pred, label)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
        step += 1
        if epoch == 0 and step == 1:
          cacher.auto_cache(g, embed_names)
        if rank == 0 and step % 20 == 0:
          print('epoch [{}] step [{}]. Loss: {:.4f}'
                .format(epoch + 1, step, loss.item()))
      if rank == 0:
        epoch_dur.append(time.time() - epoch_start_time)
        print('Epoch average time: {:.4f}'.format(np.mean(np.array(epoch_dur[2:]))))
      if cacher.log:
        miss_rate = cacher.get_miss_rate()
        print('Epoch average miss rate: {:.4f}'.format(miss_rate))
    toc = time.time()
  if rank == 0:
    print(prof.key_averages().table(sort_by='cuda_time_total'))
  print('Total Time: {:.4f}s'.format(toc - tic))
Example #7
0
def trainer(rank, world_size, args, backend='nccl'):
    # init multi process
    init_process(rank, world_size, backend)

    # load data
    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    # masks for semi-supervised learning
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nid = np.nonzero(train_mask)[0].astype(np.int64)
    chunk_size = int(train_nid.shape[0] / world_size) - 1
    train_nid = train_nid[chunk_size * rank:chunk_size * (rank + 1)]
    test_nid = np.nonzero(test_mask)[0].astype(np.int64)
    # to torch tensor
    labels = torch.LongTensor(labels)
    train_mask = torch.ByteTensor(train_mask)
    val_mask = torch.ByteTensor(val_mask)
    test_mask = torch.ByteTensor(test_mask)

    # prepare model
    num_hops = args.n_layers if args.preprocess else args.n_layers + 1
    model = GCNSampling(args.feat_size, args.n_hidden, n_classes,
                        args.n_layers, F.relu, args.dropout, args.preprocess)
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    model.cuda(rank)
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
    ctx = torch.device(rank)

    # start training
    epoch_dur = []
    sampler = dgl.contrib.sampling.NeighborSampler(
        g,
        args.batch_size,
        args.num_neighbors,
        neighbor_type='in',
        shuffle=True,
        num_workers=args.num_workers,
        num_hops=num_hops,
        seed_nodes=train_nid,
        prefetch=False)
    profile_begin = time.time()
    with torch.autograd.profiler.profile(enabled=(rank == 0),
                                         use_cuda=True) as prof:
        for epoch in range(args.n_epochs):
            model.train()
            epoch_start_time = time.time()
            step = 0
            for nf in sampler:
                with torch.autograd.profiler.record_function('gpu-load'):
                    nf.copy_from_parent(ctx=ctx)
                    batch_nids = nf.layer_parent_nid(-1)
                    label = labels[batch_nids]
                    label = label.cuda(rank, non_blocking=True)
                step += 1
                if rank == 0 and step % 20 == 0:
                    print('epoch [{}] step [{}]'.format(epoch + 1, step))
            if rank == 0:
                epoch_dur.append(time.time() - epoch_start_time)
                print('Epoch average time: {:.4f}'.format(
                    np.mean(np.array(epoch_dur[2:]))))
    print('Total Time: {:.4f}s'.format(time.time() - profile_begin))
    if rank == 0:
        print(prof.key_averages().table(sort_by='cuda_time_total'))
Example #8
0
def trainer(rank, world_size, args, backend='nccl'):
    # init multi process
    init_process(rank, world_size, backend)

    # load data
    dataname = os.path.basename(args.dataset)
    g = dgl.contrib.graph_store.create_graph_from_store(dataname, "shared_mem")
    labels = data.get_labels(args.dataset)
    n_classes = len(np.unique(labels))
    # masks for semi-supervised learning
    train_mask, val_mask, test_mask = data.get_masks(args.dataset)
    train_nid = np.nonzero(train_mask)[0].astype(np.int64)
    chunk_size = int(train_nid.shape[0] / world_size) - 1
    train_nid = train_nid[chunk_size * rank:chunk_size * (rank + 1)]
    test_nid = np.nonzero(test_mask)[0].astype(np.int64)
    # to torch tensor
    labels = torch.LongTensor(labels)
    train_mask = torch.ByteTensor(train_mask)
    val_mask = torch.ByteTensor(val_mask)
    test_mask = torch.ByteTensor(test_mask)

    # generate data
    vnum = 4036538
    data1 = torch.rand((vnum, args.feat_size))
    data2 = torch.rand((vnum, 1))  # norm
    # start training
    epoch_dur = []
    num_hops = args.n_layers if args.preprocess else args.n_layers + 1
    sampler = dgl.contrib.sampling.NeighborSampler(
        g,
        args.batch_size,
        args.num_neighbors,
        neighbor_type='in',
        shuffle=True,
        num_workers=args.num_workers,
        num_hops=num_hops,
        seed_nodes=train_nid,
        prefetch=False)
    profile_begin = time.time()
    with torch.autograd.profiler.profile(enabled=(rank == 0),
                                         use_cuda=True) as prof:
        for epoch in range(args.n_epochs):
            epoch_start_time = time.time()
            step = 0
            for nf in sampler:
                datas = []
                for i in range(nf.num_layers):
                    with torch.autograd.profiler.record_function(
                            'index-select'):
                        tnid = nf.layer_parent_nid(i)
                        fdata1 = data1[tnid]
                        fdata2 = data2[tnid]
                    datas.append([fdata1, fdata2])
                    with torch.autograd.profiler.record_function('h2d-load'):
                        fdata1 = fdata1.cuda()
                        fdata2 = fdata2.cuda()
                with torch.autograd.profiler.record_function('index-select'):
                    label = labels[tnid]
                with torch.autograd.profiler.record_function('h2d-load'):
                    label = label.cuda()
                # clear data
                del label
                for d in datas:
                    del d
                step += 1
                if rank == 0 and step % 20 == 0:
                    print('epoch [{}] step [{}]'.format(epoch + 1, step))
            if rank == 0:
                epoch_dur.append(time.time() - epoch_start_time)
                print('Epoch average time: {:.4f}'.format(
                    np.mean(np.array(epoch_dur[2:]))))
    print('Total Time: {:.4f}s'.format(time.time() - profile_begin))
    if rank == 0:
        print(prof.key_averages().table(sort_by='cuda_time_total'))