예제 #1
0
def make_policy_network(args):
    """Return the policy network for this run."""
    kwargs = args.policy_kwargs
    if args.environment == 'LunarLander-v2':
        assert "output_dim" not in kwargs
        policy_network = MultilayerPerceptron(4, **kwargs)
        batch = np.zeros((1, 8), dtype=np.float32)
    elif args.environment in ['CartPole-v0', 'CartPole-v1']:
        assert "output_dim" not in kwargs
        policy_network = MultilayerPerceptron(2, **kwargs)
        batch = np.zeros((1, 4), dtype=np.float32)
    else:
        if args.policy_model == 'pmlp':
            policy_network = ParallelMultilayerPerceptron(**args.policy_kwargs)
        elif args.policy_model == 'apmlp':
            policy_network = AttentionPMLP(**args.policy_kwargs)
        elif args.policy_model == 'tpmlp':
            policy_network = TransformerPMLP(**args.policy_kwargs)
        else:
            policy_network = PointerNetwork(**args.policy_kwargs)
        batch = np.zeros(
            (1, 10, 2 * args.k * int(args.distribution.split('-')[0])),
            dtype=np.int32)
    policy_network(batch)  # build network
    if args.policy_weights != "":
        policy_network.load_weights(args.policy_weights)
    return policy_network
예제 #2
0
def make_value_network(args):
    """Return the value network for this run."""
    kwargs = args.value_kwargs
    if args.value_model == 'none':
        value_network = None
    elif args.environment == 'LunarLander-v2':
        assert "output_dim" not in kwargs and "final_activation" not in kwargs
        value_network = MultilayerPerceptron(1,
                                             final_activation='linear',
                                             **args.value_kwargs)
        batch = np.zeros((1, 8), dtype=np.float32)
        value_network(batch)  # build network
    elif args.environment in ['CartPole-v0', 'CartPole-v1']:
        assert "output_dim" not in kwargs and "final_activation" not in kwargs
        value_network = MultilayerPerceptron(1,
                                             final_activation='linear',
                                             **args.value_kwargs)
        batch = np.zeros((1, 4), dtype=np.float32)
        value_network(batch)  # build network
    else:
        if args.value_model == 'pairsleft':
            value_network = PairsLeftBaseline(gam=args.gam)
        else:
            value_network = 'env'
    if args.value_weights != "":
        value_network.load_weights(args.value_weights)
    return value_network
예제 #3
0
def make_value_network(args):
    """Return the value network for this run."""
    kwargs = args.value_kwargs
    if args.value_model == 'none':
        value_network = None
    elif args.environment == 'LunarLander-v2':
        assert "output_dim" not in kwargs and "final_activation" not in kwargs
        value_network = MultilayerPerceptron(1,
                                             final_activation='linear',
                                             **args.value_kwargs)
        batch = np.zeros((1, 8), dtype=np.float32)
        value_network(batch)  # build network
    elif args.environment in ['CartPole-v0', 'CartPole-v1']:
        assert "output_dim" not in kwargs and "final_activation" not in kwargs
        value_network = MultilayerPerceptron(1,
                                             final_activation='linear',
                                             **args.value_kwargs)
        batch = np.zeros((1, 4), dtype=np.float32)
        value_network(batch)  # build network
    elif args.value_model == 'pairsleft':
        value_network = PairsLeftBaseline(gam=args.gam)
    elif args.value_model == 'rnn':
        value_network = RecurrentValueModel(**args.value_kwargs)
    elif args.value_model == 'pool':
        value_network = PoolingValueModel(**args.value_kwargs)
    elif args.value_model == 'tvm':
        value_network = TransformerValueModel(**args.value_kwargs)
        batch = np.zeros(
            (1, 10, 2 * args.k * int(args.distribution.split('-')[0])),
            dtype=np.int32)
        value_network(batch)
    else:
        value_network = 'env'
    if args.value_weights != "":
        value_network.load_weights(args.value_weights)
    return value_network
예제 #4
0
def make_value_network(args):
    """Return the value network for this run."""
    dims = {
        'CartPole-v0': (4, 2),
        'CartPole-v1': (4, 2),
        'LunarLander-v2': (8, 4),
        'RandomBinomialIdeal': (2 * args.variables * args.k, 1),
        'MixedRandomBinomialIdeal': (2 * args.variables * args.k, 1),
        'RandomPolynomialIdeal': (2 * args.variables * args.k, 1)
    }[args.environment]

    if args.environment in ['CartPole-v0', 'CartPole-v1', 'LunarLander-v2']:
        if args.value_model == 'none':
            value_network = None
        else:
            value_network = MultilayerPerceptron(dims[0],
                                                 args.value_hl,
                                                 1,
                                                 final_activation='linear')
    else:
        if args.value_model == 'none':
            value_network = None
        elif args.value_model == 'pairsleft':
            value_network = PairsLeftBaseline(gam=args.gam)
        elif args.value_model == 'degree':
            value_network = AgentBaseline(BuchbergerAgent('degree'),
                                          gam=args.gam)
        elif args.value_model == 'agent':
            agent = PPOAgent(
                ParallelMultilayerPerceptron(dims[0], args.policy_hl))
            agent.load_policy_weights(args.value_weights)
            value_network = AgentBaseline(agent, gam=args.gam)
        elif args.value_model == 'rnn' and args.value_weights != "":
            value_network = ValueRNN(dims[0], args.value_hl[0])
            value_network.load_weights(args.value_weights)
        elif args.value_model == 'rnn':
            value_network = ValueRNN(dims[0], args.value_hl[0])

    return value_network
예제 #5
0
def make_policy_network(args):
    """Return the policy network for this run."""
    dims = {
        'CartPole-v0': (4, 2),
        'CartPole-v1': (4, 2),
        'LunarLander-v2': (8, 4),
        'RandomBinomialIdeal': (2 * args.variables * args.k, 1),
        'MixedRandomBinomialIdeal': (2 * args.variables * args.k, 1),
        'RandomPolynomialIdeal': (2 * args.variables * args.k, 1)
    }[args.environment]

    if args.environment in [
            'RandomBinomialIdeal', 'MixedRandomBinomialIdeal',
            'RandomPolynomialIdeal'
    ]:
        policy_network = ParallelMultilayerPerceptron(dims[0], args.policy_hl)
    else:
        policy_network = MultilayerPerceptron(dims[0], args.policy_hl, dims[1])

    if args.policy_weights != "":
        policy_network.load_weights(args.policy_weights)

    return policy_network