Example #1
0
    def open(self):
        """Initiate and scale the cluster"""

        # initiate the cluster object
        # Look at the ~/.config/dask/mintpy.yaml file for changing the Dask configuration defaults
        print('initiate Dask cluster')
        if self.cluster_type == 'local':
            from dask.distributed import LocalCluster

            # initiate cluster object
            self.cluster = LocalCluster()

        else:
            # for non-local cluster, import related dask module only when it's needed
            # because job_queue is not available on macports, which make sense
            import dask_jobqueue

            # initiate cluster object
            if self.cluster_type == 'lsf':
                self.cluster = dask_jobqueue.LSFCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'pbs':
                self.cluster = dask_jobqueue.PBSCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'slurm':
                self.cluster = dask_jobqueue.SLURMCluster(
                    **self.cluster_kwargs)

            else:
                msg = 'un-recognized input cluster: {}'.format(
                    self.cluster_type)
                msg += '\nsupported clusters: {}'.format(CLUSTER_LIST)
                raise ValueError(msg)

            # show dask cluster job script for reference
            print("\n", self.cluster.job_script())
            # for debug
            debug_mode = False
            if debug_mode:
                with open('dask_command_run_from_python.txt', 'w') as f:
                    f.write(self.cluster.job_script() + '\n')

        # This line submits num_worker jobs to the cluster to start a bunch of workers
        # In tests on Pegasus `general` queue in Jan 2019, no more than 40 workers could RUN
        # at once (other user's jobs gained higher priority in the general at that point)
        print('scale the cluster to {} workers'.format(self.num_worker))
        self.cluster.scale(self.num_worker)
Example #2
0
    def open(self):
        """Initiate the cluster"""

        # initiate the cluster object
        # Look at the ~/.config/dask/mintpy.yaml file for changing the Dask configuration defaults
        print('initiate Dask cluster')
        if self.cluster_type == 'local':
            from dask.distributed import LocalCluster

            # initiate cluster object
            self.cluster = LocalCluster()

        else:
            # for non-local cluster, import related dask module only when it's needed
            # because job_queue is not available on macports, which make sense
            import dask_jobqueue

            # initiate cluster object
            if self.cluster_type == 'lsf':
                self.cluster = dask_jobqueue.LSFCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'pbs':
                self.cluster = dask_jobqueue.PBSCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'slurm':
                self.cluster = dask_jobqueue.SLURMCluster(
                    **self.cluster_kwargs)

            else:
                msg = 'un-recognized input cluster: {}'.format(
                    self.cluster_type)
                msg += '\nsupported clusters: {}'.format(CLUSTER_LIST)
                raise ValueError(msg)

            # show dask cluster job script for reference
            print("\n", self.cluster.job_script())
            # for debug
            debug_mode = False
            if debug_mode:
                with open('dask_command_run_from_python.txt', 'w') as f:
                    f.write(self.cluster.job_script() + '\n')
        type=bool,
        default=False,
        help=(
            "set to True to use a SLURM cluster, False to run locally. If True, make"
            + " sure you run this script with sbatch, so the scheduler is on the same "
            + "network as the worker nodes."
        ),
    )
    args = parser.parse_args()

    if args.use_slurm:
        cluster = dask_jobqueue.SLURMCluster(
            cores=4,
            processes=4,
            memory="2GB",
            walltime="0:00:05",
            queue="all",
            local_directory="/tmp/",
            interfacestr="em2",
        )
        cluster.scale(16)  # Ask the cluster for 16 worker processes, wait until arrival

        # Print a link to the HTTP diagnostics server
        print("Dashboard link: {0}".format(cluster.dashboard_link))
    else:
        cluster = dask.distributed.LocalCluster(processes=False, dashboard_address=None)
    client = dask.distributed.Client(cluster)

    tuner = hyperband.Hyperband(
        get_hyperparameter_configuration,
        run_then_return_val_loss,
Example #4
0
    opts = ga_opts()

    # perform safe checks prior to any calculation
    safe_checks()

    # clean the working directory
    if opts['continue'] == '1':
        clean()

    # create SLURM Cluster if available; if not, use multiprocessing
    slurm = os.getenv('SLURM_JOB_PARTITION', None)

    if slurm != None:
        cluster = dask_jobqueue.SLURMCluster(
            queue=os.environ['SLURM_JOB_PARTITION'],
            cores=1,
            walltime='0',
            memory=opts['memory'],
            local_directory=os.getenv('TMPDIR', '/tmp'))
        cluster.adapt(minimum_jobs=opts['min'], maximum_jobs=opts['max'])
        client = Client(cluster)

    else:
        cluster = LocalCluster(n_workers=opts['max'],
                               processes=True,
                               threads_per_worker=1,
                               local_directory=os.getenv('TMPDIR', '/tmp'))
        client = Client(cluster)

    # read model configuration
    parameters = configurate()
Example #5
0
    # perform safe checks prior to any calculation
    safe_checks()

    # clean the working directory
    clean()

    # create SLURM Cluster if available
    try:
        slurm = os.environ['SLURM_JOB_PARTITION']
    except:
        slurm = None

    if slurm:
        cluster = dask_jobqueue.SLURMCluster(
            queue=os.environ['SLURM_JOB_PARTITION'],
            cores=1,
            memory=os.environ['SLURM_MEM_PER_CPU'],
            local_directory=os.getenv('TMPDIR', '/tmp'))

        client = Client(cluster)
        cluster.start_workers(opts['ntasks'])

    # read model configuration
    parameters = configurate()

    # Sterope Main Algorithm
    population = populate()
    # simulate levels
    simulate()
    # evaluate sensitivity
    sensitivity = evaluate()