Esempio n. 1
0
def _init_mpi():
    """provides a way to manually set the thread init mode for MPI if necessary.
    Needs to happen as early as possible, otherwise mpi4py might auto-init somewhere else.
    """
    try:
        import mpi4py
    except ImportError:
        return
    # only change finalize setting if unset
    finalize = (mpi4py.rc.finalize is None) or mpi4py.rc.finalize
    mpi4py.rc(initialize=False, finalize=finalize)
    from mpi4py import MPI
    if not MPI.Is_initialized():
        required_level = int(
            os.environ.get('PYMOR_MPI_INIT_THREAD', MPI.THREAD_MULTIPLE))
        supported_lvl = MPI.Init_thread(required_level)
        if supported_lvl < required_level:
            print(
                f'MPI does support threading level {required_level}, running with {supported_lvl} instead',
                flush=True)
    try:
        # this solves sporadic mpi calls happening after finalize
        import petsc4py
        petsc4py.init()
    except ImportError:
        return
Esempio n. 2
0
def start_mpi(block_nonroot_stdout=True):
    """
    Check if MPI has already been initialized. If so, just set the communicators,
    Npus, and rank variables.

    Parameters
    ----------

    block_nonroot_stdout : bool (True)
        Redirect stdout on nonzero ranks to /dev/null, for cleaner output.

    """
    global world_comm, node_comm, rank_comm, rank, Npus
    if not MPI.Is_initialized():
        MPI.Init_thread(MPI.THREAD_MULTIPLE)
        atexit.register(MPI.Finalize)
    world_comm = MPI.COMM_WORLD
    node_comm = world_comm.Split_type(MPI.COMM_TYPE_SHARED)
    rank_comm = world_comm.Split(color=node_comm.rank)

    Npus = world_comm.Get_size()
    rank = world_comm.Get_rank()
    set_mpi_excepthook(world_comm)

    world_comm.Barrier()

    if (not rank == 0) and block_nonroot_stdout:  # pragma: no cover
        # For non-root ranks, do not print to stdout.
        # (Uncovered until we have multi-rank tests)
        sys.stdout = open('/dev/null', 'w')
Esempio n. 3
0
    def main(self):
        MPI.Init_thread(MPI.THREAD_MULTIPLE)
        if MPI.Query_thread() != MPI.THREAD_MULTIPLE:
            print 'ERROR: make sure MPI is configured with thread support'
            self.terminate()

        self.read_commandline()
        self.init_logger()
        self.show_banner()
        self.list_solvers()
        self.init_defaults()
        if self.read_controlfile() and self.load_solver():
            self.initialise_solver()
            self.start_solver()
    def set_level(cls, level):
        if cls.__LEVEL != None:
            raise ParallelismAlreadySet(
                'Can not reset the parallelism level when it has already been set.'
            )
        if level not in cls.__LEVELS:
            raise ValueError(
                f'Unrecognized parallelism option! Valid choices are {cls.__LEVELS}'
            )

        cls.__LEVEL = level
        if level == LEVEL_2:
            MPI.Init_thread()
            atexit.register(MPI.Finalize)
Esempio n. 5
0
 def __init__(
     self,
     run_function,
     num_workers: int = None,
     callbacks=None,
     run_function_kwargs=None,
     comm=None,
 ):
     super().__init__(run_function, num_workers, callbacks,
                      run_function_kwargs)
     if not MPI.Is_initialized():
         MPI.Init_thread()
     self.comm = comm if comm else MPI.COMM_WORLD
     self.num_workers = self.comm.Get_size() - 1  # 1 rank is the master
     self.sem = asyncio.Semaphore(self.num_workers)
     logging.info(
         f"Creating MPIPoolExecutor with {self.num_workers} max_workers...")
     self.executor = MPIPoolExecutor(max_workers=self.num_workers)
     logging.info("Creation of MPIPoolExecutor done")
Esempio n. 6
0
from mpi4py import rc
rc.initialize = False

from mpi4py import MPI
assert not MPI.Is_initialized()
assert not MPI.Is_finalized()

MPI.Init_thread()
assert MPI.Is_initialized()
assert not MPI.Is_finalized()

import sys
name, _ = MPI.get_vendor()
if name == 'MPICH':
    assert MPI.Query_thread() == MPI.THREAD_MULTIPLE
if name == 'MPICH2' and sys.platform[:3] != 'win':
    assert MPI.Query_thread() == MPI.THREAD_MULTIPLE

MPI.Finalize()
assert MPI.Is_initialized()
assert MPI.Is_finalized()
Esempio n. 7
0
 def __init__(self):
     if not MPI.Is_initialized():
         MPI.Init_thread()
     self.comm = MPI.COMM_WORLD
     self.size = self.comm.size
     self.rank = self.comm.rank
Esempio n. 8
0

import sys, copy, time, os
import libessc as essc

#import testlib as essc

import numpy as np
from mpi4py import rc
rc.initialize = False

from mpi4py import MPI
assert not MPI.Is_initialized()
assert not MPI.Is_finalized()

MPI.Init_thread(MPI.THREAD_MULTIPLE)
assert MPI.Is_initialized()
assert not MPI.Is_finalized()

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
world_Size = comm.Get_size()
thisHost = os.getenv('HOSTNAME')
if rank == 0:
    print Version()
    print Description
    essc.printInfo()

# Variables
seqFiles = []
ABseqFiles = []
Esempio n. 9
0
    def __init__(
        self,
        problem,
        run_function,
        random_state: int = None,
        log_dir: str = ".",
        verbose: int = 0,
        comm=None,
        run_function_kwargs: dict = None,
        n_jobs: int = 1,
        surrogate_model: str = "RF",
        surrogate_model_kwargs: dict = None,
        n_initial_points: int = 10,
        lazy_socket_allocation: bool = False,
        communication_batch_size=2048,
        sync_communication: bool = False,
        sync_communication_freq: int = 10,
        checkpoint_file: str = "results.csv",
        checkpoint_freq: int = 1,
        acq_func: str = "UCB",
        acq_optimizer: str = "auto",
        kappa: float = 1.96,
        xi: float = 0.001,
        sample_max_size: int = -1,
        sample_strategy: str = "quantile",
    ):

        # get the __init__ parameters
        self._init_params = locals()
        self._call_args = []

        self._problem = problem
        self._run_function = run_function
        self._run_function_kwargs = ({} if run_function_kwargs is None else
                                     run_function_kwargs)

        if type(random_state) is int:
            self._seed = random_state
            self._random_state = np.random.RandomState(random_state)
        elif isinstance(random_state, np.random.RandomState):
            self._random_state = random_state
        else:
            self._random_state = np.random.RandomState()

        # Create logging directory if does not exist
        self._log_dir = os.path.abspath(log_dir)
        pathlib.Path(log_dir).mkdir(parents=False, exist_ok=True)

        self._verbose = verbose

        # mpi
        if not MPI.Is_initialized():
            MPI.Init_thread()
        self._comm = comm if comm else MPI.COMM_WORLD
        self._rank = self._comm.Get_rank()
        self._size = self._comm.Get_size()
        self._communication_batch_size = communication_batch_size
        logging.info(f"DMBSMPI has {self._size} worker(s)")

        # force socket allocation with dummy message to reduce overhead
        if not lazy_socket_allocation:
            logging.info("Initializing communication...")
            ti = time.time()
            logging.info("Sending to all...")
            t1 = time.time()
            req_send = [
                self._comm.isend(None, dest=i, tag=TAG_INIT)
                for i in range(self._size) if i != self._rank
            ]
            MPI.Request.waitall(req_send)
            logging.info(f"Sending to all done in {time.time() - t1:.4f} sec.")

            logging.info("Receiving from all...")
            t1 = time.time()
            req_recv = [
                self._comm.irecv(source=i, tag=TAG_INIT)
                for i in range(self._size) if i != self._rank
            ]
            MPI.Request.waitall(req_recv)
            logging.info(
                f"Receiving from all done in {time.time() - t1:.4f} sec.")
            logging.info(
                f"Initializing communications done in {time.time() - ti:.4f} sec."
            )

        # sync communication management
        self._sync_communication = sync_communication
        self._sync_communication_freq = sync_communication_freq

        # checkpointing
        self._checkpoint_size = 0
        self._checkpoint_file = checkpoint_file
        self._checkpoint_freq = checkpoint_freq

        # set random state for given rank
        self._rank_seed = self._random_state.randint(
            low=0, high=2**32, size=self._size)[self._rank]

        self._timestamp = time.time()

        self._history = History()

        if acq_optimizer == "auto":
            if acq_func == "qUCB":
                acq_optimizer = "sampling"
            else:
                acq_optimizer = "boltzmann_sampling"

        if acq_func == "qUCB":
            kappa = self._random_state.exponential(kappa,
                                                   size=self._size)[self._rank]
            acq_func = "UCB"

        # check if it is possible to convert the ConfigSpace to standard skopt Space
        if (isinstance(self._problem.space, CS.ConfigurationSpace)
                and len(self._problem.space.get_forbiddens()) == 0
                and len(self._problem.space.get_conditions()) == 0):
            self._opt_space = convert_to_skopt_space(self._problem.space)
        else:
            self._opt_space = self._problem.space

        self._opt = None
        self._opt_kwargs = dict(
            dimensions=self._opt_space,
            base_estimator=self._get_surrogate_model(
                surrogate_model,
                surrogate_model_kwargs,
                n_jobs,
            ),
            acq_func=MAP_acq_func.get(acq_func, acq_func),
            acq_func_kwargs={
                "xi": xi,
                "kappa": kappa
            },
            acq_optimizer=acq_optimizer,
            acq_optimizer_kwargs={
                "n_points": 10000,
                "boltzmann_gamma": 1,
                # "boltzmann_psucc": 1/self._size,
                "n_jobs": n_jobs,
            },
            n_initial_points=n_initial_points,
            random_state=self._rank_seed,
            sample_max_size=sample_max_size,
            sample_strategy=sample_strategy,
        )
Esempio n. 10
0
def main(
    cfg: AAEModelConfig,
    encoder_gpu: int,
    generator_gpu: int,
    discriminator_gpu: int,
    distributed: bool,
):

    # Do some scaffolding for DDP
    comm_rank = 0
    comm_size = 1
    comm = None
    if distributed and dist.is_available():

        import mpi4py

        mpi4py.rc.initialize = False
        from mpi4py import MPI  # noqa: E402

        MPI.Init_thread()

        # get communicator: duplicate from comm world
        comm = MPI.COMM_WORLD.Dup()

        # now match ranks between the mpi comm and the nccl comm
        os.environ["WORLD_SIZE"] = str(comm.Get_size())
        os.environ["RANK"] = str(comm.Get_rank())

        # init pytorch
        dist.init_process_group(backend="nccl", init_method="env://")
        comm_rank = dist.get_rank()
        comm_size = dist.get_world_size()

    model_hparams = AAE3dHyperparams(
        num_features=cfg.num_features,
        encoder_filters=cfg.encoder_filters,
        encoder_kernel_sizes=cfg.encoder_kernel_sizes,
        generator_filters=cfg.generator_filters,
        discriminator_filters=cfg.discriminator_filters,
        latent_dim=cfg.latent_dim,
        encoder_relu_slope=cfg.encoder_relu_slope,
        generator_relu_slope=cfg.generator_relu_slope,
        discriminator_relu_slope=cfg.discriminator_relu_slope,
        use_encoder_bias=cfg.use_encoder_bias,
        use_generator_bias=cfg.use_generator_bias,
        use_discriminator_bias=cfg.use_discriminator_bias,
        noise_mu=cfg.noise_mu,
        noise_std=cfg.noise_std,
        lambda_rec=cfg.lambda_rec,
        lambda_gp=cfg.lambda_gp,
    )

    # optimizers
    optimizer_hparams = OptimizerHyperparams(name=cfg.optimizer_name,
                                             hparams={"lr": cfg.optimizer_lr})

    # Save hparams to disk and load initial weights and create virtual h5 file
    if comm_rank == 0:
        cfg.output_path.mkdir(exist_ok=True)
        model_hparams.save(cfg.output_path.joinpath("model-hparams.json"))
        optimizer_hparams.save(
            cfg.output_path.joinpath("optimizer-hparams.json"))
        init_weights = get_init_weights(cfg)
        h5_file, h5_files = get_h5_training_file(cfg)
        with open(cfg.output_path.joinpath("virtual-h5-metadata.json"),
                  "w") as f:
            json.dump(h5_files, f)

    else:
        init_weights, h5_file = None, None

    if comm_size > 1:
        init_weights = comm.bcast(init_weights, 0)
        h5_file = comm.bcast(h5_file, 0)

    # construct model
    aae = AAE3d(
        cfg.num_points,
        cfg.num_features,
        cfg.batch_size,
        model_hparams,
        optimizer_hparams,
        gpu=(encoder_gpu, generator_gpu, discriminator_gpu),
        init_weights=init_weights,
    )

    enc_device = torch.device(f"cuda:{encoder_gpu}")
    if comm_size > 1:
        if (encoder_gpu == generator_gpu) and (encoder_gpu
                                               == discriminator_gpu):
            aae.model = DDP(aae.model,
                            device_ids=[enc_device],
                            output_device=enc_device)
        else:
            aae.model = DDP(aae.model, device_ids=None, output_device=None)

    # set global default device
    torch.cuda.set_device(enc_device.index)

    if comm_rank == 0:
        # Diplay model
        print(aae)

    assert isinstance(h5_file, Path)
    # set up dataloaders
    train_dataset = get_dataset(
        cfg.dataset_location,
        h5_file,
        cfg.dataset_name,
        cfg.rmsd_name,
        cfg.fnc_name,
        cfg.num_points,
        cfg.num_features,
        split="train",
        shard_id=comm_rank,
        num_shards=comm_size,
        normalize="box",
        cms_transform=False,
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg.batch_size,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
        num_workers=cfg.num_data_workers,
    )

    valid_dataset = get_dataset(
        cfg.dataset_location,
        h5_file,
        cfg.dataset_name,
        cfg.rmsd_name,
        cfg.fnc_name,
        cfg.num_points,
        cfg.num_features,
        split="valid",
        shard_id=comm_rank,
        num_shards=comm_size,
        normalize="box",
        cms_transform=False,
    )

    valid_loader = DataLoader(
        valid_dataset,
        batch_size=cfg.batch_size,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
        num_workers=cfg.num_data_workers,
    )

    print(
        f"Having {len(train_dataset)} training and {len(valid_dataset)} validation samples."
    )

    wandb_config = setup_wandb(cfg, aae.model, comm_rank)

    # Optional callbacks
    loss_callback = LossCallback(cfg.output_path.joinpath("loss.json"),
                                 wandb_config=wandb_config,
                                 mpi_comm=comm)

    checkpoint_callback = CheckpointCallback(
        out_dir=cfg.output_path.joinpath("checkpoint"), mpi_comm=comm)

    save_callback = SaveEmbeddingsCallback(
        out_dir=cfg.output_path.joinpath("embeddings"),
        interval=cfg.embed_interval,
        sample_interval=cfg.sample_interval,
        mpi_comm=comm,
    )

    # TSNEPlotCallback requires SaveEmbeddingsCallback to run first
    tsne_callback = TSNEPlotCallback(
        out_dir=cfg.output_path.joinpath("embeddings"),
        projection_type="3d",
        target_perplexity=100,
        interval=cfg.tsne_interval,
        tsne_is_blocking=True,
        wandb_config=wandb_config,
        mpi_comm=comm,
    )

    # Train model with callbacks
    callbacks = [
        loss_callback,
        checkpoint_callback,
        save_callback,
        tsne_callback,
    ]

    # Optionaly train for a different number of
    # epochs on the first DDMD iterations
    if cfg.stage_idx == 0:
        epochs = cfg.initial_epochs
    else:
        epochs = cfg.epochs

    aae.train(train_loader, valid_loader, epochs, callbacks=callbacks)

    # Save loss history to disk.
    if comm_rank == 0:
        loss_callback.save(cfg.output_path.joinpath("loss.json"))

        # Save final model weights to disk
        aae.save_weights(
            cfg.output_path.joinpath("encoder-weights.pt"),
            cfg.output_path.joinpath("generator-weights.pt"),
            cfg.output_path.joinpath("discriminator-weights.pt"),
        )