def main():  # noqa: D103
    os.makedirs(output_dir, exist_ok=True)

    cluster = dask_jobqueue.PBSCluster(
        name="STR_subset",
        walltime="48:00:00",
        log_directory=output_dir
    )
    cluster.scale(22)
    client = dask.distributed.Client(cluster)

    jobs = []
    for chrom in range(1, 23):
        jobs.append(subset_strs(chrom))

    futures = client.compute(jobs)

    with open(f"{output_dir}/results.txt", 'w') as results_file:
        for chrom, future in enumerate(futures):
            chrom += 1
            result = future.result()
            if result is None:
                results_file.write(f"chrom {chrom} succeeded\n\n")
            else:
                results_file.write(
                    f"chrom {chrom} failed. Error: {result}\n\n"
                )
Beispiel #2
0
    def open(self):
        """Initiate and scale the cluster"""

        # initiate the cluster object
        # Look at the ~/.config/dask/mintpy.yaml file for changing the Dask configuration defaults
        print('initiate Dask cluster')
        if self.cluster_type == 'local':
            from dask.distributed import LocalCluster

            # initiate cluster object
            self.cluster = LocalCluster()

        else:
            # for non-local cluster, import related dask module only when it's needed
            # because job_queue is not available on macports, which make sense
            import dask_jobqueue

            # initiate cluster object
            if self.cluster_type == 'lsf':
                self.cluster = dask_jobqueue.LSFCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'pbs':
                self.cluster = dask_jobqueue.PBSCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'slurm':
                self.cluster = dask_jobqueue.SLURMCluster(
                    **self.cluster_kwargs)

            else:
                msg = 'un-recognized input cluster: {}'.format(
                    self.cluster_type)
                msg += '\nsupported clusters: {}'.format(CLUSTER_LIST)
                raise ValueError(msg)

            # show dask cluster job script for reference
            print("\n", self.cluster.job_script())
            # for debug
            debug_mode = False
            if debug_mode:
                with open('dask_command_run_from_python.txt', 'w') as f:
                    f.write(self.cluster.job_script() + '\n')

        # This line submits num_worker jobs to the cluster to start a bunch of workers
        # In tests on Pegasus `general` queue in Jan 2019, no more than 40 workers could RUN
        # at once (other user's jobs gained higher priority in the general at that point)
        print('scale the cluster to {} workers'.format(self.num_worker))
        self.cluster.scale(self.num_worker)
Beispiel #3
0
def main():  # noqa: D103
    output_dir = f'{os.environ["UKB"]}/exome/test_output'

    cluster = dask_jobqueue.PBSCluster(name="test",
                                       walltime="4:00:00",
                                       log_directory=output_dir,
                                       queue="condo")
    # Maximum of 10 concurrent downloads per application
    # See here: https://biobank.ctsu.ox.ac.uk/showcase/refer.cgi?id=644
    cluster.adapt(minimum_jobs=10, maximum_jobs=10)
    client = dask.distributed.Client(cluster)

    jobs = set()
    jobs.add(fail())

    futures = client.compute(jobs, retries=1)

    for future in futures:
        future.result()  # block till code is done executing
        results_file.write(f"{future.key} succeeded\n")
Beispiel #4
0
    def open(self):
        """Initiate the cluster"""

        # initiate the cluster object
        # Look at the ~/.config/dask/mintpy.yaml file for changing the Dask configuration defaults
        print('initiate Dask cluster')
        if self.cluster_type == 'local':
            from dask.distributed import LocalCluster

            # initiate cluster object
            self.cluster = LocalCluster()

        else:
            # for non-local cluster, import related dask module only when it's needed
            # because job_queue is not available on macports, which make sense
            import dask_jobqueue

            # initiate cluster object
            if self.cluster_type == 'lsf':
                self.cluster = dask_jobqueue.LSFCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'pbs':
                self.cluster = dask_jobqueue.PBSCluster(**self.cluster_kwargs)

            elif self.cluster_type == 'slurm':
                self.cluster = dask_jobqueue.SLURMCluster(
                    **self.cluster_kwargs)

            else:
                msg = 'un-recognized input cluster: {}'.format(
                    self.cluster_type)
                msg += '\nsupported clusters: {}'.format(CLUSTER_LIST)
                raise ValueError(msg)

            # show dask cluster job script for reference
            print("\n", self.cluster.job_script())
            # for debug
            debug_mode = False
            if debug_mode:
                with open('dask_command_run_from_python.txt', 'w') as f:
                    f.write(self.cluster.job_script() + '\n')
Beispiel #5
0
def main():  # noqa: D103
    parser = argparse.ArgumentParser()
    parser.add_argument("pipeline_name", choices={'fe', 'spb'})
    parser.add_argument("bulk_file",
                        help="name of a file in the exome directory")
    args = parser.parse_args()
    vcf_dir = f'{ukb}/exome/{args.pipeline_name}_vcfs'
    output_dir = f'{vcf_dir}_output'
    bulk_floc = f'{ukb}/exome/{args.bulk_file}'

    assert os.path.exists(vcf_dir)
    assert os.path.exists(bulk_floc)

    current_files = set(os.listdir(vcf_dir))
    cluster = dask_jobqueue.PBSCluster(
        name="UKB_gVCF_download",
        walltime="4:00:00",
        log_directory=output_dir,
        queue="condo"
    )
    # Maximum of 10 concurrent downloads per application
    # See here: https://biobank.ctsu.ox.ac.uk/showcase/refer.cgi?id=644
    cluster.adapt(minimum_jobs=10, maximum_jobs=10)
    client = dask.distributed.Client(cluster)

    jobs = set()
    # calculate number of download batches
    with open(bulk_floc) as bulk_file:
        for line in bulk_file:
            sample_ID, field_ID = line.split()
            if field_ID in {'23176_0_0', '23161_0_0'}:
                suffix = 'gz'
            elif field_ID == {'23177_0_0', '23162_0_0'}:
                suffix = 'tbi'
            file_name = f"{sample_ID}_{field_ID}.{suffix}"
            if file_name in current_files:
                continue
            jobs.add(client.submit(
                download_item, sample_ID, field_ID, vcf_dir,
                key=f'download_item-{sample_ID}-{field_ID}'
            ))

    print(f"Number of jobs queued: {len(jobs)}")
    retried_keys = set()

    now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    with open(f"{output_dir}/results_{now}.txt", 'w') as results_file:
        for future in dask.distributed.as_completed(jobs):
            key = future.key

            err = future.exception()
            if err:
                print(f"{key} failed with raised error. Error: {err}",
                      file=sys.stderr)
                if key in retried_keys:
                    print(f"{key} was already retried.", file=sys.stderr)
                    sys.exit(1)
                else:
                    retried_keys.add(key)
                    future.retry()
                    continue

            result = future.result()
            if result is None:
                results_file.write(f"{key} succeeded\n")
            else:
                results_file.write(f"{key} failed. Error: {result}\n\n")
                if key in retried_keys:
                    print(f"{key} was already retried.", file=sys.stderr)
                    sys.exit(1)
                else:
                    retried_keys.add(key)
                    future.retry()
                    continue

            # make sure to mark the future as cancelled so it is not rerun
            # even if the job it was on dies unexpectedly and is restarted
            future.cancel()