def make_policy_network(args): """Return the policy network for this run.""" kwargs = args.policy_kwargs if args.environment == 'LunarLander-v2': assert "output_dim" not in kwargs policy_network = MultilayerPerceptron(4, **kwargs) batch = np.zeros((1, 8), dtype=np.float32) elif args.environment in ['CartPole-v0', 'CartPole-v1']: assert "output_dim" not in kwargs policy_network = MultilayerPerceptron(2, **kwargs) batch = np.zeros((1, 4), dtype=np.float32) else: if args.policy_model == 'pmlp': policy_network = ParallelMultilayerPerceptron(**args.policy_kwargs) elif args.policy_model == 'apmlp': policy_network = AttentionPMLP(**args.policy_kwargs) elif args.policy_model == 'tpmlp': policy_network = TransformerPMLP(**args.policy_kwargs) else: policy_network = PointerNetwork(**args.policy_kwargs) batch = np.zeros( (1, 10, 2 * args.k * int(args.distribution.split('-')[0])), dtype=np.int32) policy_network(batch) # build network if args.policy_weights != "": policy_network.load_weights(args.policy_weights) return policy_network
def make_value_network(args): """Return the value network for this run.""" kwargs = args.value_kwargs if args.value_model == 'none': value_network = None elif args.environment == 'LunarLander-v2': assert "output_dim" not in kwargs and "final_activation" not in kwargs value_network = MultilayerPerceptron(1, final_activation='linear', **args.value_kwargs) batch = np.zeros((1, 8), dtype=np.float32) value_network(batch) # build network elif args.environment in ['CartPole-v0', 'CartPole-v1']: assert "output_dim" not in kwargs and "final_activation" not in kwargs value_network = MultilayerPerceptron(1, final_activation='linear', **args.value_kwargs) batch = np.zeros((1, 4), dtype=np.float32) value_network(batch) # build network else: if args.value_model == 'pairsleft': value_network = PairsLeftBaseline(gam=args.gam) else: value_network = 'env' if args.value_weights != "": value_network.load_weights(args.value_weights) return value_network
def make_value_network(args): """Return the value network for this run.""" dims = { 'CartPole-v0': (4, 2), 'CartPole-v1': (4, 2), 'LunarLander-v2': (8, 4), 'RandomBinomialIdeal': (2 * args.variables * args.k, 1), 'MixedRandomBinomialIdeal': (2 * args.variables * args.k, 1), 'RandomPolynomialIdeal': (2 * args.variables * args.k, 1) }[args.environment] if args.environment in ['CartPole-v0', 'CartPole-v1', 'LunarLander-v2']: if args.value_model == 'none': value_network = None else: value_network = MultilayerPerceptron(dims[0], args.value_hl, 1, final_activation='linear') else: if args.value_model == 'none': value_network = None elif args.value_model == 'pairsleft': value_network = PairsLeftBaseline(gam=args.gam) elif args.value_model == 'degree': value_network = AgentBaseline(BuchbergerAgent('degree'), gam=args.gam) elif args.value_model == 'agent': agent = PPOAgent( ParallelMultilayerPerceptron(dims[0], args.policy_hl)) agent.load_policy_weights(args.value_weights) value_network = AgentBaseline(agent, gam=args.gam) elif args.value_model == 'rnn' and args.value_weights != "": value_network = ValueRNN(dims[0], args.value_hl[0]) value_network.load_weights(args.value_weights) elif args.value_model == 'rnn': value_network = ValueRNN(dims[0], args.value_hl[0]) return value_network
def make_value_network(args): """Return the value network for this run.""" kwargs = args.value_kwargs if args.value_model == 'none': value_network = None elif args.environment == 'LunarLander-v2': assert "output_dim" not in kwargs and "final_activation" not in kwargs value_network = MultilayerPerceptron(1, final_activation='linear', **args.value_kwargs) batch = np.zeros((1, 8), dtype=np.float32) value_network(batch) # build network elif args.environment in ['CartPole-v0', 'CartPole-v1']: assert "output_dim" not in kwargs and "final_activation" not in kwargs value_network = MultilayerPerceptron(1, final_activation='linear', **args.value_kwargs) batch = np.zeros((1, 4), dtype=np.float32) value_network(batch) # build network elif args.value_model == 'pairsleft': value_network = PairsLeftBaseline(gam=args.gam) elif args.value_model == 'rnn': value_network = RecurrentValueModel(**args.value_kwargs) elif args.value_model == 'pool': value_network = PoolingValueModel(**args.value_kwargs) elif args.value_model == 'tvm': value_network = TransformerValueModel(**args.value_kwargs) batch = np.zeros( (1, 10, 2 * args.k * int(args.distribution.split('-')[0])), dtype=np.int32) value_network(batch) else: value_network = 'env' if args.value_weights != "": value_network.load_weights(args.value_weights) return value_network