Example #1
0
def test_integration_pox(prompt=None):
    """This test repeats main_RepeatsNcov.c and validates that the
       various stages report the same results as the original C code
       for the POX
    """

    # user input parameters
    seed = 15324
    inputfile = ncovparams_csv
    line_num = 0
    UV = 1.0

    # load all of the parameters
    try:
        params = Parameters.load(parameters="march29")
    except Exception as e:
        print(f"Unable to load parameter files. Make sure that you have "
              f"cloned the MetaWardsData repository and have set the "
              f"environment variable METAWARDSDATA to point to the "
              f"local directory containing the repository, e.g. the "
              f"default is $HOME/GitHub/MetaWardsData")
        raise e

    # load the disease and starting-point input files
    params.set_disease("pox")
    params.set_input_files("2011Data")
    params.add_seeds("ExtraSeedsBrighton.dat")

    # start from the parameters in the specified line number of the
    # provided input file
    variables = params.read_variables(inputfile, line_num)

    # extra parameters that are set
    params.UV = UV
    params.static_play_at_home = 0
    params.play_to_work = 0
    params.work_to_play = 0
    params.daily_imports = 0.0

    # the size of the starting population
    population = Population(initial=57104043)

    profiler = Profiler()

    print("Building the network...")
    network = Network.build(params=params,
                            profiler=profiler)

    params = params.set_variables(variables[0])
    network.update(params, profiler=profiler)

    outdir = os.path.join(script_dir, "test_integration_output")

    with OutputFiles(outdir, force_empty=True, prompt=prompt) as output_dir:
        print("Run the model...")
        trajectory = network.run(population=population, seed=seed,
                                 output_dir=output_dir,
                                 nsteps=31, profiler=profiler,
                                 nthreads=1)

    OutputFiles.remove(outdir, prompt=None)

    print("End of the run")

    Console.print_profiler(profiler)

    Console.rule("Model output")
    Console.print_population(trajectory[-1])

    # The original C code has this expected population after 47 steps
    expected = Population(initial=57104043,
                          susceptibles=56080780,
                          latent=374,
                          total=370,
                          recovereds=553,
                          n_inf_wards=289,
                          day=31)

    Console.rule("Expected output")
    Console.print_population(expected)

    assert trajectory[-1] == expected
Example #2
0
def cli():
    """Main function for the command line interface. This does one of three
       things:

       1. If this is the main process, then it parses the arguments and
          runs and manages the jobs

       2. If this is a worker process, then it starts up and waits for work

       3. If this is a supervisor process, then it query the job scheduling
          system for information about the compute nodes to use, and will then
          set up and run a manager (main) process that will use those
          nodes to run the jobs
    """
    from metawards.utils import Console

    # get the parallel scheme now before we import any other modules
    # so that it is clear if mpi4py or scoop (or another parallel module)
    # has been imported via the required "-m module" syntax
    parallel_scheme = get_parallel_scheme()

    if parallel_scheme == "mpi4py":
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        nprocs = comm.Get_size()
        rank = comm.Get_rank()

        if rank != 0:
            # this is a worker process, so should not do anything
            # more until it is given work in the pool
            Console.print(f"Starting worker process {rank+1} of {nprocs-1}...")
            return
        else:
            Console.print("Starting main process...")

    elif parallel_scheme == "scoop":
        Console.print("STARTING SCOOP PROCESS")

    import sys

    args, parser = parse_args()

    if not args.already_supervised:
        hostfile = get_hostfile(args)
        if hostfile:
            # The user has asked to run a parallel job - this means that this
            # process is the parallel supervisor
            if args.mpi:
                mpi_supervisor(hostfile, args)
                return
            elif args.scoop:
                scoop_supervisor(hostfile, args)
                return

            # neither is preferred - if scoop is installed then use that
            try:
                import scoop  # noqa - disable unused warning
                have_scoop = True
            except Exception:
                have_scoop = False

            if have_scoop:
                scoop_supervisor(hostfile, args)
                return

            # do we have MPI?
            try:
                import mpi4py  # noqa - disable unused warning
                have_mpi4py = True
            except Exception:
                have_mpi4py = False

            if have_mpi4py:
                mpi_supervisor(hostfile, args)
                return

            # we don't have any other option, just keep going and
            # use multiprocessing - in this case we don't need a
            # supervisor and this is the main process

    # This is now the code for the main process

    # WE NEED ONE OF these listed options;
    should_run = False

    for arg in [
            args.input, args.repeats, args.disease, args.additional,
            args.model, args.iterator, args.extractor, args.demographics,
            args.mixer, args.mover
    ]:
        if arg is not None:
            should_run = True
            break

    if not should_run:
        parser.print_help(sys.stdout)
        sys.exit(0)

    if args.repeats is None:
        args.repeats = [1]

    # import the parameters here to speed up the display of help
    from metawards import Parameters, Network, Population, print_version_string

    # print the version information first, so that there is enough
    # information to enable someone to reproduce this run
    print_version_string()

    Console.rule("Initialise")

    if args.input:
        # get the line numbers of the input file to read
        if args.line is None or len(args.line) == 0:
            linenums = None
            Console.print(f"* Using parameters from all lines of {args.input}",
                          markdown=True)
        else:
            from metawards.utils import string_to_ints
            linenums = string_to_ints(args.line)

            if len(linenums) == 0:
                Console.error(f"You cannot read no lines from {args.input}?")
                sys.exit(-1)
            elif len(linenums) == 1:
                Console.print(
                    f"* Using parameters from line {linenums[0]} of "
                    f"{args.input}",
                    markdown=True)
            else:
                Console.print(
                    f"* Using parameters from lines {linenums} of "
                    f"{args.input}",
                    markdown=True)

        from metawards import VariableSets, VariableSet
        variables = VariableSets.read(filename=args.input,
                                      line_numbers=linenums)
    else:
        from metawards import VariableSets, VariableSet
        # create a VariableSets with one null VariableSet
        variables = VariableSets()
        variables.append(VariableSet())

    nrepeats = args.repeats

    if nrepeats is None or len(nrepeats) < 1:
        nrepeats = [1]

    if len(nrepeats) > 1 and len(variables) != len(nrepeats):
        Console.error(f"The number of repeats {len(nrepeats)} must equal the "
                      f"number of adjustable variable lines {len(variables)}")
        raise ValueError("Disagreement in the number of repeats and "
                         "adjustable variables")

    # ensure that all repeats are >= 0
    nrepeats = [0 if int(x) < 0 else int(x) for x in nrepeats]

    if sum(nrepeats) == 0:
        Console.error(f"The number of the number of repeats is 0. Are you "
                      f"sure that you don't want to run anything?")
        raise ValueError("Cannot run nothing")

    if len(nrepeats) == 1 and nrepeats[0] == 1:
        Console.print("* Performing a single run of each set of parameters",
                      markdown=True)
    elif len(nrepeats) == 1:
        Console.print(
            f"* Performing {nrepeats[0]} runs of each set of parameters",
            markdown=True)
    else:
        Console.print(
            f"* Performing {nrepeats} runs applied to the parameters",
            markdown=True)

    variables = variables.repeat(nrepeats)

    # working out the number of processes and threads...
    from metawards.utils import guess_num_threads_and_procs
    (nthreads,
     nprocs) = guess_num_threads_and_procs(njobs=len(variables),
                                           nthreads=args.nthreads,
                                           nprocs=args.nprocs,
                                           parallel_scheme=parallel_scheme)

    Console.print(
        f"\n* Number of threads to use for each model run is {nthreads}",
        markdown=True)

    if nprocs > 1:
        Console.print(
            f"* Number of processes used to parallelise model "
            f"runs is {nprocs}",
            markdown=True)
        Console.print(
            f"* Parallelisation will be achieved using {parallel_scheme}",
            markdown=True)

    # sort out the random number seed
    seed = args.seed

    if seed is None:
        import random
        seed = random.randint(10000, 99999999)

    if seed == 0:
        # this is a special mode that a developer can use to force
        # all jobs to use the same random number seed (15324) that
        # is used for comparing outputs. This should NEVER be used
        # for production code
        Console.warning("Using special mode to fix all random number"
                        "seeds to 15324. DO NOT USE IN PRODUCTION!!!")
    else:
        Console.print(f"* Using random number seed {seed}", markdown=True)

    # get the starting day and date
    start_day = args.start_day

    if start_day < 0:
        raise ValueError(f"You cannot use a start day {start_day} that is "
                         f"less than zero!")

    start_date = None

    if args.start_date:
        try:
            from dateparser import parse
            start_date = parse(args.start_date).date()
        except Exception:
            pass

        if start_date is None:
            from datetime import date
            try:
                start_date = date.fromisoformat(args.start_date)
            except Exception as e:
                raise ValueError(f"Cannot interpret a valid date from "
                                 f"'{args.start_date}'. Error is "
                                 f"{e.__class__} {e}")

    if start_date is None:
        from datetime import date
        start_date = date.today()

    Console.print(f"* Day zero is {start_date.strftime('%A %B %d %Y')}",
                  markdown=True)

    if start_day != 0:
        from datetime import timedelta
        start_day_date = start_date + timedelta(days=start_day)
        Console.print(f"Starting on day {start_day}, which is "
                      f"{start_day_date.strftime('%A %B %d %Y')}")
    else:
        start_day_date = start_date

    # now find the MetaWardsData repository as this will be needed
    # for the repeat command line too
    (repository,
     repository_version) = Parameters.get_repository(args.repository)

    Console.print(f"* Using MetaWardsData at {repository}", markdown=True)

    if repository_version["is_dirty"]:
        Console.warning("This repository is dirty, meaning that the data"
                        "has not been committed to git. This may make "
                        "this calculation very difficult to reproduce")

    # now work out the minimum command line needed to repeat this job
    args.seed = seed
    args.nprocs = nprocs
    args.nthreads = nthreads
    args.start_date = start_date.isoformat()
    args.repository = repository

    # also print the source of all inputs
    import configargparse
    Console.rule("Souce of inputs")
    p = configargparse.get_argument_parser("main")
    Console.print(p.format_values())

    # print out the command used to repeat this job
    repeat_cmd = "metawards"

    for key, value in vars(args).items():
        if value is not None:
            k = key.replace("_", "-")

            if isinstance(value, bool):
                if value:
                    repeat_cmd += f" --{k}"
            elif isinstance(value, list):
                repeat_cmd += f" --{k}"
                for val in value:
                    v = str(val)
                    if " " in v:
                        repeat_cmd += f" '{v}''"
                    else:
                        repeat_cmd += f" {v}"
            else:
                v = str(value)
                if " " in v:
                    repeat_cmd += f" --{k} '{v}''"
                else:
                    repeat_cmd += f" --{k} {v}"

    Console.rule("Repeating this run")
    Console.print("To repeat this job use the command;")
    Console.command(repeat_cmd)
    Console.print("Or alternatively use the config.yaml file that will be "
                  "written to the output directory and use the command;")
    Console.command("metawards -c config.yaml")

    # load all of the parameters
    try:
        params = Parameters.load(parameters=args.parameters)
    except Exception as e:
        Console.warning(
            f"Unable to load parameter files. Make sure that you have "
            f"cloned the MetaWardsData repository and have set the "
            f"environment variable METAWARDSDATA to point to the "
            f"local directory containing the repository, e.g. the "
            f"default is $HOME/GitHub/MetaWardsData")
        raise e

    # should we profile the code? (default no as it prints a lot)
    profiler = None

    if args.no_profile:
        profiler = None
    elif args.profile:
        from metawards.utils import Profiler
        profiler = Profiler()

    # load the disease and starting-point input files
    Console.rule("Disease")
    if args.disease:
        params.set_disease(args.disease)
    else:
        params.set_disease("ncov")

    Console.rule("Model data")
    if args.model:
        params.set_input_files(args.model)
    else:
        params.set_input_files("2011Data")

    # load the user-defined custom parameters
    Console.rule("Custom parameters and seeds")
    if args.user_variables:
        custom = VariableSet.read(args.user_variables)
        Console.print(f"Adjusting variables to {custom}")
        custom.adjust(params)
    else:
        Console.print("Not adjusting any parameters...")

    # read the additional seeds
    if args.additional is None or len(args.additional) == 0:
        Console.print("Not using any additional seeds...")
    else:
        for additional in args.additional:
            Console.print(f"Loading additional seeds from {additional}")
            params.add_seeds(additional)

    # what to do with the 0 state?
    stage_0 = "R"

    if args.disable_star:
        Console.print("Disabling the * state. Stage 0 is the one and "
                      "only E state.")
        stage_0 = "disable"
    elif args.star_is_E:
        Console.print("Setting the * state as an additional E state.")
        stage_0 = "E"
    else:
        Console.print("Setting the * state as an additional R state.")
        stage_0 = "R"

    params.stage_0 = stage_0

    # extra parameters that are set
    params.UV = args.UV

    # set these extra parameters to 0
    params.static_play_at_home = 0
    params.play_to_work = 0
    params.work_to_play = 0
    params.daily_imports = 0.0

    Console.rule("Parameters")
    Console.print(params, markdown=True)

    # the size of the starting population
    population = Population(initial=args.population,
                            date=start_day_date,
                            day=start_day)

    Console.rule("Building the network")
    network = Network.build(params=params,
                            population=population,
                            max_nodes=args.max_nodes,
                            max_links=args.max_links,
                            profiler=profiler)

    if args.demographics:
        from metawards import Demographics
        Console.rule("Specialising into demographics")
        demographics = Demographics.load(args.demographics)
        Console.print(demographics)

        network = network.specialise(demographics,
                                     profiler=profiler,
                                     nthreads=nthreads)

    Console.rule("Preparing to run")
    from metawards import OutputFiles
    from metawards.utils import run_models

    outdir = args.output

    if outdir is None:
        outdir = "output"

    if args.force_overwrite_output:
        prompt = None
    else:
        from metawards import input

        def prompt(x):
            return input(x, default="y")

    auto_bzip = True

    if args.auto_bzip:
        auto_bzip = True
    elif args.no_auto_bzip:
        auto_bzip = False

    if args.iterator:
        iterator = args.iterator
    else:
        iterator = None

    if args.extractor:
        extractor = args.extractor
    else:
        extractor = None

    if args.mixer:
        mixer = args.mixer
    else:
        mixer = None

    if args.mover:
        mover = args.mover
    else:
        mover = None

    with OutputFiles(outdir,
                     force_empty=args.force_overwrite_output,
                     auto_bzip=auto_bzip,
                     prompt=prompt) as output_dir:
        # write the config file for this job to output/config.yaml
        Console.rule("Running the model")
        CONSOLE = output_dir.open("console.log")
        Console.save(CONSOLE)

        lines = []
        max_keysize = None

        for key, value in vars(args).items():
            if max_keysize is None:
                max_keysize = len(key)
            elif len(key) > max_keysize:
                max_keysize = len(key)

        for key, value in vars(args).items():
            if value is not None:
                key = key.replace("_", "-")
                spaces = " " * (max_keysize - len(key))

                if isinstance(value, bool):
                    if value:
                        lines.append(f"{key}:{spaces} true")
                    else:
                        lines.append(f"{key}:{spaces} false")
                elif isinstance(value, list):
                    s_value = [str(x) for x in value]
                    lines.append(f"{key}:{spaces} [ {', '.join(s_value)} ]")
                else:
                    lines.append(f"{key}:{spaces} {value}")

        CONFIG = output_dir.open("config.yaml", auto_bzip=False)
        lines.sort(key=str.swapcase)
        CONFIG.write("\n".join(lines))
        CONFIG.write("\n")
        CONFIG.flush()
        CONFIG.close()
        lines = None

        result = run_models(network=network,
                            variables=variables,
                            population=population,
                            nprocs=nprocs,
                            nthreads=nthreads,
                            seed=seed,
                            nsteps=args.nsteps,
                            output_dir=output_dir,
                            iterator=iterator,
                            extractor=extractor,
                            mixer=mixer,
                            mover=mover,
                            profiler=profiler,
                            parallel_scheme=parallel_scheme)

        if result is None or len(result) == 0:
            Console.print("No output - end of run")
            return 0

        Console.rule("End of the run", style="finish")

        Console.save(CONSOLE)

    return 0