Ejemplo n.º 1
0
def test_parameterset():
    vars0 = VariableSet(variables=l0)

    assert vars0.repeat_index() == 1

    for key, value in l0.items():
        assert key in vars0.variable_names()
        assert value in vars0.variable_values()
        assert vars0[key] == value

    vars1 = VariableSet(l1, 2)

    assert vars1.repeat_index() == 2

    for key, value in l1.items():
        assert key in vars1.variable_names()
        assert value in vars1.variable_values()
        assert vars1[key] == value

    assert vars0.fingerprint() != vars1.fingerprint()
    assert vars0.fingerprint() != vars0.fingerprint(include_index=True)
    assert vars1.fingerprint() != vars1.fingerprint(include_index=True)

    variables = VariableSets()
    assert len(variables) == 0

    variables.append(vars0)
    variables.append(vars1)

    assert len(variables) == 2
    assert variables[0] == vars0
    assert variables[1] == vars1

    variables = variables.repeat(5)

    assert len(variables) == 10

    for i in range(0, 5):
        idx0 = 2 * i
        idx1 = idx0 + 1

        print(f"{idx0} : {variables[idx0]} vs {l0}")
        print(f"{idx1} : {variables[idx1]} vs {l1}")

        assert variables[idx0].variables() == l0
        assert variables[idx1].variables() == l1
        assert variables[idx0].fingerprint() == vars0.fingerprint()
        assert variables[idx1].fingerprint() == vars1.fingerprint()
        assert variables[idx0].repeat_index() == i + 1
        assert variables[idx1].repeat_index() == i + 1
Ejemplo n.º 2
0
def test_variableset():
    variables = VariableSets.read(ncovparams_csv)

    assert len(variables) == 2

    for v in variables:
        assert v.repeat_index() == 1

    v2 = variables.repeat(2)

    assert len(v2) == 4

    assert v2[0] == variables[0]
    assert v2[1] == variables[1]
    assert v2[2].variables() == variables[0].variables()
    assert v2[2].repeat_index() == 2
    assert v2[3].variables() == variables[1].variables()
    assert v2[3].repeat_index() == 2

    v3 = variables.repeat([2, 4])

    assert len(v3) == 6

    assert v3[0] == variables[0]
    assert v3[1] == variables[1]
    assert v3[2].variables() == variables[0].variables()
    assert v3[2].repeat_index() == 2
    assert v3[3].variables() == variables[1].variables()
    assert v3[3].repeat_index() == 2
    assert v3[4].variables() == variables[1].variables()
    assert v3[4].repeat_index() == 3
    assert v3[5].variables() == variables[1].variables()
    assert v3[5].repeat_index() == 4

    v4 = variables.repeat([4, 2])

    assert len(v4) == 6

    assert v4[0] == variables[0]
    assert v4[1] == variables[1]
    assert v4[2].variables() == variables[0].variables()
    assert v4[2].repeat_index() == 2
    assert v4[3].variables() == variables[1].variables()
    assert v4[3].repeat_index() == 2
    assert v4[4].variables() == variables[0].variables()
    assert v4[4].repeat_index() == 3
    assert v4[5].variables() == variables[0].variables()
    assert v4[5].repeat_index() == 4

    with pytest.raises(ValueError):
        variables.repeat([2, 4, 6])

    for variable in v4:
        assert variable.output_dir() == variable.fingerprint(
            include_index=True)
Ejemplo n.º 3
0
def test_variables_compliance():
    variables = VariableSets.read(compliance_dat)

    assert len(variables) == 11

    values = [1.0, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]

    for variable, f in zip(variables, values):
        print(variable)
        print(len(variable))
        print(variable[".compliance"], f)
        assert len(variable) == 1
        assert variable[".compliance"] == f
Ejemplo n.º 4
0
def test_variables_with_repeats():
    from metawards.utils import Profiler
    p = Profiler()
    p = p.start("read")
    variables = VariableSets.read(params_with_repeats_csv)
    p = p.stop()

    p = p.start("loop")
    for variable in variables:
        assert variable.output_dir() != variable.fingerprint(
            include_index=True)

        o = "beta_%.1f_ill_%.2f" % (variable["beta[2]"],
                                    variable["too_ill_to_move[2]"])
        o = o.replace(".", "i")

        print(variable.output_dir(), o)

        assert variable.output_dir() == o
    p = p.stop()
    print(p)
Ejemplo n.º 5
0
def test_variable_pathway():
    demographics_json = os.path.join(script_dir, "data", "red_one_blue.json")
    variables_csv = os.path.join(script_dir, "data", "demographic_scan.csv")

    demographics = Demographics.load(demographics_json)
    variables = VariableSets.read(variables_csv)

    print(variables[0].fingerprint(include_index=True))

    params = Parameters.load()
    params.set_disease("lurgy")
    params.set_input_files("single")

    network = Network.build(params)
    network = network.specialise(demographics)

    params = network.params.set_variables(variables[0])

    assert params.disease_params.beta == [0.0, 0.0, 0.1, 0.2, 0.0]
    assert params["overall"].disease_params.beta == \
        [0.0, 0.0, 0.1, 0.2, 0.0]
    assert params["red one"].disease_params.beta == \
        [0.0, 0.0, 0.1, 0.5, 0.27]
    print(params["blue"].disease_params.beta)
    assert params["blue"].disease_params.beta == \
        [0.0, 0.0, 0.1, 0.2, 0.25, 0.0]

    network.update(params)

    d = network.params.disease_params
    print(d.beta)
    assert d.beta == [0.0, 0.0, 0.1, 0.2, 0.0]

    d = network.subnets[0].params.disease_params
    print(d.beta)
    assert d.beta == [0.0, 0.0, 0.1, 0.5, 0.27]

    d = network.subnets[1].params.disease_params
    print(d.beta)
    assert d.beta == [0.0, 0.0, 0.1, 0.2, 0.25, 0.0]
Ejemplo n.º 6
0
def cli():
    """Main function for the command line interface. This does one of three
       things:

       1. If this is the main process, then it parses the arguments and
          runs and manages the jobs

       2. If this is a worker process, then it starts up and waits for work

       3. If this is a supervisor process, then it query the job scheduling
          system for information about the compute nodes to use, and will then
          set up and run a manager (main) process that will use those
          nodes to run the jobs
    """
    from metawards.utils import Console

    # get the parallel scheme now before we import any other modules
    # so that it is clear if mpi4py or scoop (or another parallel module)
    # has been imported via the required "-m module" syntax
    parallel_scheme = get_parallel_scheme()

    if parallel_scheme == "mpi4py":
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        nprocs = comm.Get_size()
        rank = comm.Get_rank()

        if rank != 0:
            # this is a worker process, so should not do anything
            # more until it is given work in the pool
            Console.print(f"Starting worker process {rank+1} of {nprocs-1}...")
            return
        else:
            Console.print("Starting main process...")

    elif parallel_scheme == "scoop":
        Console.print("STARTING SCOOP PROCESS")

    import sys

    args, parser = parse_args()

    if not args.already_supervised:
        hostfile = get_hostfile(args)
        if hostfile:
            # The user has asked to run a parallel job - this means that this
            # process is the parallel supervisor
            if args.mpi:
                mpi_supervisor(hostfile, args)
                return
            elif args.scoop:
                scoop_supervisor(hostfile, args)
                return

            # neither is preferred - if scoop is installed then use that
            try:
                import scoop  # noqa - disable unused warning
                have_scoop = True
            except Exception:
                have_scoop = False

            if have_scoop:
                scoop_supervisor(hostfile, args)
                return

            # do we have MPI?
            try:
                import mpi4py  # noqa - disable unused warning
                have_mpi4py = True
            except Exception:
                have_mpi4py = False

            if have_mpi4py:
                mpi_supervisor(hostfile, args)
                return

            # we don't have any other option, just keep going and
            # use multiprocessing - in this case we don't need a
            # supervisor and this is the main process

    # This is now the code for the main process

    # WE NEED ONE OF these listed options;
    should_run = False

    for arg in [
            args.input, args.repeats, args.disease, args.additional,
            args.model, args.iterator, args.extractor, args.demographics,
            args.mixer, args.mover
    ]:
        if arg is not None:
            should_run = True
            break

    if not should_run:
        parser.print_help(sys.stdout)
        sys.exit(0)

    if args.repeats is None:
        args.repeats = [1]

    # import the parameters here to speed up the display of help
    from metawards import Parameters, Network, Population, print_version_string

    # print the version information first, so that there is enough
    # information to enable someone to reproduce this run
    print_version_string()

    Console.rule("Initialise")

    if args.input:
        # get the line numbers of the input file to read
        if args.line is None or len(args.line) == 0:
            linenums = None
            Console.print(f"* Using parameters from all lines of {args.input}",
                          markdown=True)
        else:
            from metawards.utils import string_to_ints
            linenums = string_to_ints(args.line)

            if len(linenums) == 0:
                Console.error(f"You cannot read no lines from {args.input}?")
                sys.exit(-1)
            elif len(linenums) == 1:
                Console.print(
                    f"* Using parameters from line {linenums[0]} of "
                    f"{args.input}",
                    markdown=True)
            else:
                Console.print(
                    f"* Using parameters from lines {linenums} of "
                    f"{args.input}",
                    markdown=True)

        from metawards import VariableSets, VariableSet
        variables = VariableSets.read(filename=args.input,
                                      line_numbers=linenums)
    else:
        from metawards import VariableSets, VariableSet
        # create a VariableSets with one null VariableSet
        variables = VariableSets()
        variables.append(VariableSet())

    nrepeats = args.repeats

    if nrepeats is None or len(nrepeats) < 1:
        nrepeats = [1]

    if len(nrepeats) > 1 and len(variables) != len(nrepeats):
        Console.error(f"The number of repeats {len(nrepeats)} must equal the "
                      f"number of adjustable variable lines {len(variables)}")
        raise ValueError("Disagreement in the number of repeats and "
                         "adjustable variables")

    # ensure that all repeats are >= 0
    nrepeats = [0 if int(x) < 0 else int(x) for x in nrepeats]

    if sum(nrepeats) == 0:
        Console.error(f"The number of the number of repeats is 0. Are you "
                      f"sure that you don't want to run anything?")
        raise ValueError("Cannot run nothing")

    if len(nrepeats) == 1 and nrepeats[0] == 1:
        Console.print("* Performing a single run of each set of parameters",
                      markdown=True)
    elif len(nrepeats) == 1:
        Console.print(
            f"* Performing {nrepeats[0]} runs of each set of parameters",
            markdown=True)
    else:
        Console.print(
            f"* Performing {nrepeats} runs applied to the parameters",
            markdown=True)

    variables = variables.repeat(nrepeats)

    # working out the number of processes and threads...
    from metawards.utils import guess_num_threads_and_procs
    (nthreads,
     nprocs) = guess_num_threads_and_procs(njobs=len(variables),
                                           nthreads=args.nthreads,
                                           nprocs=args.nprocs,
                                           parallel_scheme=parallel_scheme)

    Console.print(
        f"\n* Number of threads to use for each model run is {nthreads}",
        markdown=True)

    if nprocs > 1:
        Console.print(
            f"* Number of processes used to parallelise model "
            f"runs is {nprocs}",
            markdown=True)
        Console.print(
            f"* Parallelisation will be achieved using {parallel_scheme}",
            markdown=True)

    # sort out the random number seed
    seed = args.seed

    if seed is None:
        import random
        seed = random.randint(10000, 99999999)

    if seed == 0:
        # this is a special mode that a developer can use to force
        # all jobs to use the same random number seed (15324) that
        # is used for comparing outputs. This should NEVER be used
        # for production code
        Console.warning("Using special mode to fix all random number"
                        "seeds to 15324. DO NOT USE IN PRODUCTION!!!")
    else:
        Console.print(f"* Using random number seed {seed}", markdown=True)

    # get the starting day and date
    start_day = args.start_day

    if start_day < 0:
        raise ValueError(f"You cannot use a start day {start_day} that is "
                         f"less than zero!")

    start_date = None

    if args.start_date:
        try:
            from dateparser import parse
            start_date = parse(args.start_date).date()
        except Exception:
            pass

        if start_date is None:
            from datetime import date
            try:
                start_date = date.fromisoformat(args.start_date)
            except Exception as e:
                raise ValueError(f"Cannot interpret a valid date from "
                                 f"'{args.start_date}'. Error is "
                                 f"{e.__class__} {e}")

    if start_date is None:
        from datetime import date
        start_date = date.today()

    Console.print(f"* Day zero is {start_date.strftime('%A %B %d %Y')}",
                  markdown=True)

    if start_day != 0:
        from datetime import timedelta
        start_day_date = start_date + timedelta(days=start_day)
        Console.print(f"Starting on day {start_day}, which is "
                      f"{start_day_date.strftime('%A %B %d %Y')}")
    else:
        start_day_date = start_date

    # now find the MetaWardsData repository as this will be needed
    # for the repeat command line too
    (repository,
     repository_version) = Parameters.get_repository(args.repository)

    Console.print(f"* Using MetaWardsData at {repository}", markdown=True)

    if repository_version["is_dirty"]:
        Console.warning("This repository is dirty, meaning that the data"
                        "has not been committed to git. This may make "
                        "this calculation very difficult to reproduce")

    # now work out the minimum command line needed to repeat this job
    args.seed = seed
    args.nprocs = nprocs
    args.nthreads = nthreads
    args.start_date = start_date.isoformat()
    args.repository = repository

    # also print the source of all inputs
    import configargparse
    Console.rule("Souce of inputs")
    p = configargparse.get_argument_parser("main")
    Console.print(p.format_values())

    # print out the command used to repeat this job
    repeat_cmd = "metawards"

    for key, value in vars(args).items():
        if value is not None:
            k = key.replace("_", "-")

            if isinstance(value, bool):
                if value:
                    repeat_cmd += f" --{k}"
            elif isinstance(value, list):
                repeat_cmd += f" --{k}"
                for val in value:
                    v = str(val)
                    if " " in v:
                        repeat_cmd += f" '{v}''"
                    else:
                        repeat_cmd += f" {v}"
            else:
                v = str(value)
                if " " in v:
                    repeat_cmd += f" --{k} '{v}''"
                else:
                    repeat_cmd += f" --{k} {v}"

    Console.rule("Repeating this run")
    Console.print("To repeat this job use the command;")
    Console.command(repeat_cmd)
    Console.print("Or alternatively use the config.yaml file that will be "
                  "written to the output directory and use the command;")
    Console.command("metawards -c config.yaml")

    # load all of the parameters
    try:
        params = Parameters.load(parameters=args.parameters)
    except Exception as e:
        Console.warning(
            f"Unable to load parameter files. Make sure that you have "
            f"cloned the MetaWardsData repository and have set the "
            f"environment variable METAWARDSDATA to point to the "
            f"local directory containing the repository, e.g. the "
            f"default is $HOME/GitHub/MetaWardsData")
        raise e

    # should we profile the code? (default no as it prints a lot)
    profiler = None

    if args.no_profile:
        profiler = None
    elif args.profile:
        from metawards.utils import Profiler
        profiler = Profiler()

    # load the disease and starting-point input files
    Console.rule("Disease")
    if args.disease:
        params.set_disease(args.disease)
    else:
        params.set_disease("ncov")

    Console.rule("Model data")
    if args.model:
        params.set_input_files(args.model)
    else:
        params.set_input_files("2011Data")

    # load the user-defined custom parameters
    Console.rule("Custom parameters and seeds")
    if args.user_variables:
        custom = VariableSet.read(args.user_variables)
        Console.print(f"Adjusting variables to {custom}")
        custom.adjust(params)
    else:
        Console.print("Not adjusting any parameters...")

    # read the additional seeds
    if args.additional is None or len(args.additional) == 0:
        Console.print("Not using any additional seeds...")
    else:
        for additional in args.additional:
            Console.print(f"Loading additional seeds from {additional}")
            params.add_seeds(additional)

    # what to do with the 0 state?
    stage_0 = "R"

    if args.disable_star:
        Console.print("Disabling the * state. Stage 0 is the one and "
                      "only E state.")
        stage_0 = "disable"
    elif args.star_is_E:
        Console.print("Setting the * state as an additional E state.")
        stage_0 = "E"
    else:
        Console.print("Setting the * state as an additional R state.")
        stage_0 = "R"

    params.stage_0 = stage_0

    # extra parameters that are set
    params.UV = args.UV

    # set these extra parameters to 0
    params.static_play_at_home = 0
    params.play_to_work = 0
    params.work_to_play = 0
    params.daily_imports = 0.0

    Console.rule("Parameters")
    Console.print(params, markdown=True)

    # the size of the starting population
    population = Population(initial=args.population,
                            date=start_day_date,
                            day=start_day)

    Console.rule("Building the network")
    network = Network.build(params=params,
                            population=population,
                            max_nodes=args.max_nodes,
                            max_links=args.max_links,
                            profiler=profiler)

    if args.demographics:
        from metawards import Demographics
        Console.rule("Specialising into demographics")
        demographics = Demographics.load(args.demographics)
        Console.print(demographics)

        network = network.specialise(demographics,
                                     profiler=profiler,
                                     nthreads=nthreads)

    Console.rule("Preparing to run")
    from metawards import OutputFiles
    from metawards.utils import run_models

    outdir = args.output

    if outdir is None:
        outdir = "output"

    if args.force_overwrite_output:
        prompt = None
    else:
        from metawards import input

        def prompt(x):
            return input(x, default="y")

    auto_bzip = True

    if args.auto_bzip:
        auto_bzip = True
    elif args.no_auto_bzip:
        auto_bzip = False

    if args.iterator:
        iterator = args.iterator
    else:
        iterator = None

    if args.extractor:
        extractor = args.extractor
    else:
        extractor = None

    if args.mixer:
        mixer = args.mixer
    else:
        mixer = None

    if args.mover:
        mover = args.mover
    else:
        mover = None

    with OutputFiles(outdir,
                     force_empty=args.force_overwrite_output,
                     auto_bzip=auto_bzip,
                     prompt=prompt) as output_dir:
        # write the config file for this job to output/config.yaml
        Console.rule("Running the model")
        CONSOLE = output_dir.open("console.log")
        Console.save(CONSOLE)

        lines = []
        max_keysize = None

        for key, value in vars(args).items():
            if max_keysize is None:
                max_keysize = len(key)
            elif len(key) > max_keysize:
                max_keysize = len(key)

        for key, value in vars(args).items():
            if value is not None:
                key = key.replace("_", "-")
                spaces = " " * (max_keysize - len(key))

                if isinstance(value, bool):
                    if value:
                        lines.append(f"{key}:{spaces} true")
                    else:
                        lines.append(f"{key}:{spaces} false")
                elif isinstance(value, list):
                    s_value = [str(x) for x in value]
                    lines.append(f"{key}:{spaces} [ {', '.join(s_value)} ]")
                else:
                    lines.append(f"{key}:{spaces} {value}")

        CONFIG = output_dir.open("config.yaml", auto_bzip=False)
        lines.sort(key=str.swapcase)
        CONFIG.write("\n".join(lines))
        CONFIG.write("\n")
        CONFIG.flush()
        CONFIG.close()
        lines = None

        result = run_models(network=network,
                            variables=variables,
                            population=population,
                            nprocs=nprocs,
                            nthreads=nthreads,
                            seed=seed,
                            nsteps=args.nsteps,
                            output_dir=output_dir,
                            iterator=iterator,
                            extractor=extractor,
                            mixer=mixer,
                            mover=mover,
                            profiler=profiler,
                            parallel_scheme=parallel_scheme)

        if result is None or len(result) == 0:
            Console.print("No output - end of run")
            return 0

        Console.rule("End of the run", style="finish")

        Console.save(CONSOLE)

    return 0
Ejemplo n.º 7
0
def test_demographics_reset(prompt=None, nthreads=1, force_multi=False):
    """This test runs several runs one after another with the expectation
       that they should all give the same result. This tests that the
       network is being correctly reset after each run. This test
       uses a mixer and demographics to show that these can be reset
    """

    # user input parameters
    import random
    seed = random.randint(100000, 1000000)
    inputfile = ncovparams_csv
    line_num = 0
    UV = 0.0

    # load all of the parameters
    try:
        params = Parameters.load(parameters="march29")
    except Exception as e:
        print(f"Unable to load parameter files. Make sure that you have "
              f"cloned the MetaWardsData repository and have set the "
              f"environment variable METAWARDSDATA to point to the "
              f"local directory containing the repository, e.g. the "
              f"default is $HOME/GitHub/MetaWardsData")
        raise e

    # load the disease and starting-point input files
    params.set_disease(os.path.join(script_dir, "data", "ncov.json"))
    params.set_input_files("2011Data")
    params.add_seeds("ExtraSeedsBrighton.dat")

    # start from the parameters in the specified line number of the
    # provided input file
    variables = params.read_variables(inputfile, line_num)

    # extra parameters that are set
    params.UV = UV
    params.static_play_at_home = 0
    params.play_to_work = 0
    params.work_to_play = 0
    params.daily_imports = 0.0

    # the size of the starting population
    population = Population(initial=57104043)

    profiler = Profiler()

    nsteps = 20

    demographics = Demographics.load(redblue_json)

    print("Building the network...")
    network = Network.build(params=params, profiler=profiler)

    network = network.specialise(demographics, nthreads=2, profiler=profiler)

    outdir = os.path.join(script_dir, "test_integration_output")

    if can_run_multiprocessing(force_multi):
        print("Running parallel...")
        variable = variables[0]
        variables = VariableSets()
        variables.append(variable)
        variables = variables.repeat(3)

        params = params.set_variables(variables[0])
        network.update(params, profiler=profiler)

        with OutputFiles(outdir, force_empty=True,
                         prompt=prompt) as output_dir:
            results = run_models(network=network,
                                 mixer=mix_shield,
                                 output_dir=output_dir,
                                 variables=variables,
                                 population=population,
                                 nsteps=nsteps,
                                 nthreads=nthreads,
                                 nprocs=2,
                                 seed=seed,
                                 debug_seeds=True)

        OutputFiles.remove(outdir, prompt=None)

        assert len(results) == 3

        print(f"Result 1\n{results[0][1][-1]}")
        print(f"Result 2\n{results[1][1][-1]}")
        print(f"Result 3\n{results[2][1][-1]}")

        assert results[0][1] == results[1][1]
        assert results[0][1] == results[2][1]

    print("Running model 1...")
    network.update(params, profiler=profiler)

    with OutputFiles(outdir, force_empty=True, prompt=prompt) as output_dir:
        trajectory1 = network.run(population=population,
                                  seed=seed,
                                  output_dir=output_dir,
                                  nsteps=nsteps,
                                  profiler=None,
                                  mixer=mix_shield,
                                  nthreads=nthreads)

    OutputFiles.remove(outdir, prompt=None)

    # this should reset the network
    print("Running model 2...")
    network.update(params, profiler=profiler)

    with OutputFiles(outdir, force_empty=True, prompt=prompt) as output_dir:
        trajectory2 = network.run(population=population,
                                  seed=seed,
                                  output_dir=output_dir,
                                  nsteps=nsteps,
                                  profiler=None,
                                  mixer=mix_shield,
                                  nthreads=nthreads)

    OutputFiles.remove(outdir, prompt=None)

    # this should reset the network
    print("Running model 3...")
    network.update(params, profiler=profiler)

    with OutputFiles(outdir, force_empty=True, prompt=prompt) as output_dir:
        trajectory3 = network.run(population=population,
                                  seed=seed,
                                  output_dir=output_dir,
                                  nsteps=nsteps,
                                  profiler=None,
                                  mixer=mix_shield,
                                  nthreads=nthreads)

    OutputFiles.remove(outdir, prompt=None)

    print("End of the run")

    print(profiler)

    print(f"Model 1 output: {trajectory1}")
    print(f"Model 2 output: {trajectory2}")
    print(f"Model 3 output: {trajectory3}")

    assert trajectory1 == trajectory2
    assert trajectory1 == trajectory3

    if can_run_multiprocessing(force_multi):
        # this should also be the same result as the multiprocessing run
        assert trajectory1 == results[0][1]
Ejemplo n.º 8
0
    'beta[2]': 0.95,
    'beta[3]': 0.95,
    'progress[1]': 0.19,
    'progress[2]': 0.91,
    'progress[3]': 0.91
}

l1 = {
    'beta[2]': 0.90,
    'beta[3]': 0.93,
    'progress[1]': 0.18,
    'progress[2]': 0.92,
    'progress[3]': 0.90
}

vars01 = VariableSets()
vars01.append(l0)
vars01.append(l1)

vars0 = VariableSets()
vars0.append(l0)

vars1 = VariableSets()
vars1.append(l1)


def test_variableset():
    v1 = VariableSet()
    v2 = VariableSet()

    assert len(v1) == 0