Esempio n. 1
0
def main():
    args = Args.from_args()

    if args.debug:
        pass
    elif args.world_size < 2:
        warnings.warn('World size must be larger than 1')
        exit()

    if args.seed is not None:
        utils.reproduction.initialize_seed(args.seed)

    utils.environment.ulimit_n_max()

    # Run on main process to avoid conflict
    args.resolve_continue()
    args.make_run_dir()
    args.save()
    utils.pack_code(args.run_dir)

    free_port = utils.distributed.find_free_port()
    dist_url = f'tcp://127.0.0.1:{free_port}'

    print(f'world_size={args.world_size} Using dist_url={dist_url}')

    args.parser = None
    # Only single node distributed training is supported
    mp.spawn(main_worker, args=(
        args,
        dist_url,
    ), nprocs=args.world_size)
Esempio n. 2
0
def main():
    args = Args.from_args()

    if args.seed is not None:
        utils.reproduction.initialize_seed(args.seed)

    # run in main process for preventing concurrency conflict
    args.resolve_continue()
    args.make_run_dir()
    args.save()
    pack_code(args.run_dir)

    utils.environment.ulimit_n_max()

    free_port = utils.distributed.find_free_port()
    dist_url = f'tcp://127.0.0.1:{free_port}'

    print(f'world_size={args.world_size} Using dist_url={dist_url}')
    """
    We only consider single node here. 'world_size' is the number of processes.
    """
    args.parser = None
    mp.spawn(main_worker, args=(
        args,
        dist_url,
    ), nprocs=args.world_size)