Beispiel #1
0
def run_expt(args):

    optim = args["optim"]
    lr = args["learnrate"]
    schedule = [] if not args["schedule"] else [
        int(i) for i in args["schedule"].split(",")
    ]
    # default these to "none" instead of None, which argparse does for some reason
    args["group"] = "none" if args["group"] is None else args["group"]
    args["individual"] = "none" if args["individual"] is None else args[
        "individual"]
    random_effects = {"group": args["group"], "individual": args["individual"]}

    pyro.enable_validation(args["validation"])
    pyro.set_rng_seed(
        args["seed"])  # reproducible random effect parameter init
    if args["cuda"]:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    if args["dataset"] == "seal":
        filename = os.path.join(args["folder"], "prep_seal_data.csv")
        config = prepare_seal(filename, random_effects)
    elif args["dataset"] == "fake":
        fake_sizes = {
            "state": args["size_state"],
            "random": args["size_random"],
            "group": args["size_group"],
            "individual": args["size_individual"],
            "timesteps": args["size_timesteps"],
        }
        config = prepare_fake(fake_sizes, random_effects)
    else:
        raise ValueError("Dataset {} not yet included".format(args["dataset"]))

    if args["smoke"]:
        args["timesteps"] = 2
        config["sizes"]["timesteps"] = 3

    if args["truncate"] > 0:
        config["sizes"]["timesteps"] = args["truncate"]

    config["zeroinflation"] = args["zeroinflation"]

    model = Model(config)
    guide = Guide(config)
    loss_fn = parallel_loss_fn

    if args["jit"]:
        loss_fn = torch.jit.trace(
            lambda: loss_fn(model, guide, args["parallel"]), ())
    else:
        loss_fn = functools.partial(loss_fn, model, guide, args["parallel"])

    # count the number of parameters once
    num_parameters = aic_num_parameters(model, guide)

    losses = []

    # TODO support continuous random effects with monte carlo
    assert random_effects["group"] != "continuous"
    assert random_effects["individual"] != "continuous"

    with pyro.poutine.trace(param_only=True) as param_capture:
        loss_fn()
    params = [
        site["value"].unconstrained()
        for site in param_capture.trace.nodes.values()
    ]
    if optim == "sgd":
        optimizer = torch.optim.Adam(params, lr=lr)
    elif optim == "lbfgs":
        optimizer = torch.optim.LBFGS(params, lr=lr)
    else:
        raise ValueError("{} not supported optimizer".format(optim))

    if schedule:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         milestones=schedule,
                                                         gamma=0.5)
        schedule_step_loss = False
    else:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min')
        schedule_step_loss = True

    for t in range(args["timesteps"]):

        def closure():
            optimizer.zero_grad()
            loss = loss_fn()
            loss.backward()
            return loss

        loss = optimizer.step(closure)
        scheduler.step(loss.item() if schedule_step_loss else t)
        losses.append(loss.item())
        print("Loss: {}, AIC[{}]: ".format(loss.item(), t),
              2. * loss + 2. * num_parameters)

    aic_final = 2. * losses[-1] + 2. * num_parameters
    print("AIC final: {}".format(aic_final))

    results = {}
    results["args"] = args
    results["sizes"] = config["sizes"]
    results["likelihoods"] = losses
    results["likelihood_final"] = losses[-1]
    results["aic_final"] = aic_final
    results["aic_num_parameters"] = num_parameters

    if args["resultsdir"] is not None and os.path.exists(args["resultsdir"]):
        re_str = "g" + ("n" if args["group"] is None else
                        "d" if args["group"] == "discrete" else "c")
        re_str += "i" + ("n" if args["individual"] is None else
                         "d" if args["individual"] == "discrete" else "c")
        results_filename = "expt_{}_{}_{}.json".format(
            args["dataset"], re_str,
            str(uuid.uuid4().hex)[0:5])
        with open(os.path.join(args["resultsdir"], results_filename),
                  "w") as f:
            json.dump(results, f)

    return results
Beispiel #2
0
def run_expt(args):

    data_dir = args["folder"]
    dataset = "seal"  # args["dataset"]
    seed = args["seed"]
    optim = args["optim"]
    lr = args["learnrate"]
    timesteps = args["timesteps"]
    schedule = [] if not args["schedule"] else [
        int(i) for i in args["schedule"].split(",")
    ]
    random_effects = {"group": args["group"], "individual": args["individual"]}

    pyro.enable_validation(args["validation"])
    pyro.set_rng_seed(seed)  # reproducible random effect parameter init

    filename = os.path.join(data_dir, "prep_seal_data.csv")
    config = prepare_seal(filename, random_effects)

    model = functools.partial(model_generic, config)  # for JITing
    guide = functools.partial(guide_generic, config)

    # count the number of parameters once
    num_parameters = aic_num_parameters(model, guide)

    losses = []
    # SGD
    if optim == "sgd":
        loss_fn = TraceEnum_ELBO(max_plate_nesting=2).differentiable_loss
        with pyro.poutine.trace(param_only=True) as param_capture:
            loss_fn(model, guide)
        params = [
            site["value"].unconstrained()
            for site in param_capture.trace.nodes.values()
        ]
        optimizer = torch.optim.Adam(params, lr=lr)

        if schedule:
            scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=schedule, gamma=0.5)
            schedule_step_loss = False
        else:
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, 'min')
            schedule_step_loss = True

        for t in range(timesteps):

            optimizer.zero_grad()
            loss = loss_fn(model, guide)
            loss.backward()
            optimizer.step()
            scheduler.step(loss.item() if schedule_step_loss else t)
            losses.append(loss.item())

            print("Loss: {}, AIC[{}]: ".format(loss.item(), t),
                  2. * loss + 2. * num_parameters)

    # LBFGS
    elif optim == "lbfgs":
        loss_fn = TraceEnum_ELBO(max_plate_nesting=2).differentiable_loss
        with pyro.poutine.trace(param_only=True) as param_capture:
            loss_fn(model, guide)
        params = [
            site["value"].unconstrained()
            for site in param_capture.trace.nodes.values()
        ]
        optimizer = torch.optim.LBFGS(params, lr=lr)

        if schedule:
            scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=schedule, gamma=0.5)
            schedule_step_loss = False
        else:
            schedule_step_loss = True
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, 'min')

        for t in range(timesteps):

            def closure():
                optimizer.zero_grad()
                loss = loss_fn(model, guide)
                loss.backward()
                return loss

            loss = optimizer.step(closure)
            scheduler.step(loss.item() if schedule_step_loss else t)
            losses.append(loss.item())
            print("Loss: {}, AIC[{}]: ".format(loss.item(), t),
                  2. * loss + 2. * num_parameters)

    else:
        raise ValueError("{} not supported optimizer".format(optim))

    aic_final = 2. * losses[-1] + 2. * num_parameters
    print("AIC final: {}".format(aic_final))

    results = {}
    results["args"] = args
    results["sizes"] = config["sizes"]
    results["likelihoods"] = losses
    results["likelihood_final"] = losses[-1]
    results["aic_final"] = aic_final
    results["aic_num_parameters"] = num_parameters

    if args["resultsdir"] is not None and os.path.exists(args["resultsdir"]):
        re_str = "g" + ("n" if args["group"] is None else
                        "d" if args["group"] == "discrete" else "c")
        re_str += "i" + ("n" if args["individual"] is None else
                         "d" if args["individual"] == "discrete" else "c")
        results_filename = "expt_{}_{}_{}.json".format(
            dataset, re_str,
            str(uuid.uuid4().hex)[0:5])
        with open(os.path.join(args["resultsdir"], results_filename),
                  "w") as f:
            json.dump(results, f)

    return results