예제 #1
0
def get_updater(env):
    policy = Policy(GoToPoint(), Deterministic(2), env.obs_shape)
    # controller = FeedforwardCell(lambda inp, output_size: MLP([128, 128])(inp, output_size), 1)
    controller = FeedforwardCell(
        lambda inp, output_size: fully_connected(
            inp, output_size, activation_fn=None), 1)
    estimator = NeuralValueEstimator(controller, env.obs_shape)
    alg = cfg.alg_class(estimator, name="critic")
    updater = RLUpdater(env, policy, alg)
    return updater
예제 #2
0
파일: collect.py 프로젝트: alcinos/dps
def build_object_network_controller(output_size, name):
    from dps.utils.tf import ObjectNetwork
    ff = ObjectNetwork(n_repeats=1, scope="collection_controller")
    return FeedforwardCell(ff, output_size, name=name)
예제 #3
0
파일: collect.py 프로젝트: alcinos/dps
def build_attentional_relation_network(output_size, name):
    from dps.utils.tf import AttentionalRelationNetwork
    ff = AttentionalRelationNetwork(n_repeats=2, scope="collection_controller")
    return FeedforwardCell(ff, output_size, name=name)
예제 #4
0
    "Exponential(start=10000.0, end=0.000000001, decay_rate=0.1, decay_steps=3000, log=True)",
    z_pres_temperature=1.0,
    run_all_time_steps=False,
    stopping_threshold=0.99,
    per_process_gpu_memory_fraction=0.3,
    training_wheels=0.0,
    scale_prior_mean=-1.0,
    scale_prior_std=np.sqrt(0.05),
    shift_prior_mean=0.0,
    shift_prior_std=3.0,
    complete_rnn_input=False,
)

dair_config = air_config.copy(
    difference_air=True,
    build_cell=lambda scope: FeedforwardCell(MLP(n_units=[256, 256, 256, 256]),
                                             cfg.rnn_n_units),
)

nem_config = alg_config.copy(
    alg_name="nem",
    build_network=nem.NEM_Network,
    batch_size=16,
    lr_schedule=0.001,
    max_grad_norm=None,
    threshold=-np.inf,
    max_experiments=None,
    render_hook=nem.NeuralEM_RenderHook(4),
    render_step=5000,

    # ------- from nem.py --------
    noise_prob=0.2,  # probability of annihilating the pixel
예제 #5
0
 def __call__(self, param_shape, name=None):
     return FeedforwardCell(MLP(), param_shape, name=name)
예제 #6
0
 def __call__(self, param_shape, name=None):
     return FeedforwardCell(MLP(*self.args, **self.kwargs),
                            param_shape,
                            name=name)
예제 #7
0
파일: mountain_car.py 프로젝트: alcinos/dps
from dps import cfg
from dps.config import DEFAULT_CONFIG
from dps.env import BatchGymEnv
from dps.utils.tf import MLP, FeedforwardCell
from dps.rl import rl_render_hook, BuildSoftmaxPolicy, BuildMlpController


def build_env():
    return BatchGymEnv(gym_env='MountainCar-v0')


controller = lambda params_dim, name: FeedforwardCell(
    lambda inp, output_size: MLP(
        [cfg.n_controller_units, cfg.n_controller_units])(inp, output_size),
    params_dim, name=name)


config = DEFAULT_CONFIG.copy()


# So far, have not been able to solve this with a policy gradient method, the exploration problem is quite hard.


config.update(
    env_name="mountain_car",

    build_env=build_env,

    build_controller=BuildMlpController(),
    build_policy=BuildSoftmaxPolicy(one_hot=False),
    exploration_schedule="1.0",