Ejemplo n.º 1
0
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--ini",
                        required=True,
                        help="Ini file to use for this run")
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    args = parser.parse_args()
    config = conf.Config(args.ini)
    device = torch.device("cuda" if config.train_cuda else "cpu")

    name = config.train_name(suffix=args.name)
    writer = SummaryWriter(comment="-" + name)
    save_path = os.path.join("saves", name)
    os.makedirs(save_path)

    cube_env = cubes.get(config.cube_type)
    assert isinstance(cube_env, cubes.CubeEnv)
    log.info("Selected cube: %s", cube_env)
    value_targets_method = model.ValueTargetsMethod(
        config.train_value_targets_method)

    net = model.Net(cube_env.encoded_shape,
                    len(cube_env.action_enum)).to(device)
    print(net)
    opt = optim.Adam(net.parameters(), lr=config.train_learning_rate)
    sched = scheduler.StepLR(opt, 1, gamma=config.train_lr_decay_gamma
                             ) if config.train_lr_decay_enabled else None

    step_idx = 0
    buf_policy_loss, buf_value_loss, buf_loss = [], [], []
    buf_policy_loss_raw, buf_value_loss_raw, buf_loss_raw = [], [], []
Ejemplo n.º 2
0
    parser.add_argument("-e",
                        "--env",
                        required=True,
                        help="Type of env to train, supported types=%s" %
                        cubes.names())
    parser.add_argument("-m",
                        "--model",
                        required=True,
                        help="Model file to load")
    parser.add_argument("-o",
                        "--output",
                        required=True,
                        help="Output prefix for plots")
    args = parser.parse_args()

    cube_env = cubes.get(args.env)
    log.info("Selected cube: %s", cube_env)
    net = model.Net(cube_env.encoded_shape, len(cube_env.action_enum))
    net.load_state_dict(
        torch.load(args.model, map_location=lambda storage, loc: storage))
    net.eval()
    log.info("Network loaded from %s", args.model)

    #    model.make_train_data(cube_env, net, device='cpu', batch_size=10, scramble_depth=2, shuffle=False)

    states_by_depth = gen_states(cube_env,
                                 max_depth=MAX_DEPTH,
                                 round_counts=ROUND_COUNTS)
    # for idx, states in enumerate(states_by_depth):
    #     log.info("%d: %s", idx, states)