Python init Examples

Programming Language: Python

Namespace/Package Name: byteps.torch

Method/Function: init

Examples at hotexamples.com: 4

Python init - 4 examples found. These are the top rated real world Python examples of byteps.torch.init extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: train_imagenet_resnet_byteps_ddp.py Project: zprhhs/byteps

def main():
    args = parser.parse_args()
    bps.init()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        args.gpu = bps.local_rank()
        main_worker(args.gpu, ngpus_per_node, args)
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)

Example #2

Show file

def main():
    bps.init()
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    torch.cuda.set_device(bps.local_rank())
    # parse arguments
    args = parse_args()
    if args is None:
        exit()

    # open session
    gan = UGATIT(args)

    # build graph
    gan.build_model()

    if args.phase == 'train':
        gan.train()
        print(" [*] Training finished!")

    if args.phase == 'test':
        gan.test()
        print(" [*] Test finished!")

Example #3

Show file

                    default=1000,
                    help='number of benchmark iterations')
parser.add_argument('--no-cuda',
                    action='store_true',
                    default=False,
                    help='disables CUDA')
parser.add_argument('--no-wait',
                    type=bool,
                    default=True,
                    help='wait for other worker request first')
parser.add_argument('--gpu', type=int, default=-1, help='use a specified gpu')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

hvd.init()

# Horovod: pin GPU to local rank.
if args.gpu >= 0:
    torch.cuda.set_device(args.gpu)
else:
    torch.cuda.set_device(hvd.local_rank())

cudnn.benchmark = True


def log(s, nl=True):
    if hvd.rank() != 0:
        return
    print(s, end='\n' if nl else '')

Example #4

Show file

                    help='number of warmup epochs')
parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum')
parser.add_argument('--wd', type=float, default=0.00005, help='weight decay')

parser.add_argument('--no-cuda',
                    action='store_true',
                    default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=42, help='random seed')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

allreduce_batch_size = args.batch_size * args.batches_per_allreduce

bps.init()
torch.manual_seed(args.seed)

if args.cuda:
    # BytePS: pin GPU to local rank.
    torch.cuda.set_device(bps.local_rank())
    torch.cuda.manual_seed(args.seed)

cudnn.benchmark = True

# If set > 0, will resume training from a given checkpoint.
resume_from_epoch = 0
for try_epoch in range(args.epochs, 0, -1):
    if os.path.exists(args.checkpoint_format.format(epoch=try_epoch)):
        resume_from_epoch = try_epoch
        break