Ejemplo n.º 1
0
 def __init__(self,
              id,
              device,
              policy_network,
              gamma=0.999,
              batch_size_experience_replay=128,
              target_update=10,
              optimizer=None,
              loss_function=None,
              loss_function_gate=None,
              ratio_learn_test=None,
              workspace=None,
              lr=None,
              weight_decay=None,
              transfer_module=None,
              writer=None,
              info={},
              transfer_param_init=None,
              feature=None,
              **kwargs):
     self.id = id
     self.info = info
     self.tm = transfer_module
     # self.type_transfer_module = type(transfer_module)
     # print(self.type_transfer_module)
     # exit()
     self.feature = feature
     if transfer_param_init is None:
         self.transfer_param_init = {
             "w": np.random.random_sample(1)[0],
             "b": np.random.random_sample(1)[0]
         }
     else:
         self.transfer_param_init = transfer_param_init
     self.ratio_learn_test = ratio_learn_test
     self.loss_function_gate = loss_fonction_factory(loss_function_gate)
     self.best_fit_over_time = []
     self.ae_errors_over_time = []
     self.error_bootstrap_source = []
     self.error_bootstrap_partial = []
     self.p_over_time = []
     self.writer = writer
     self.device = device
     self.size_mini_batch = batch_size_experience_replay
     self.gamma = gamma
     self.target_update = target_update
     self.workspace = workspace
     self.full_net = policy_network.to(self.device)
     self.n_actions = self.full_net.predict.out_features
     self.loss_function = loss_fonction_factory(loss_function)
     self.lr = lr
     self.weight_decay = weight_decay
     self.optimizer_type = optimizer
     self.reset()
Ejemplo n.º 2
0
def main(loss_function_str,
         optimizer_str,
         weight_decay,
         learning_rate,
         normalize,
         autoencoder_size,
         n_epochs,
         feature_autoencoder_info,
         workspace,
         device,
         type_ae="AEA",
         N_actions=None,
         writer=None):
    import torch
    loss_function = loss_fonction_factory(loss_function_str)
    makedirs(workspace)
    feature = build_feature_autoencoder(feature_autoencoder_info)
    min_n, max_n = autoencoder_size

    all_transitions = utils.read_samples_for_ae(workspace / "samples", feature,
                                                N_actions)

    autoencoders = [
        AutoEncoder(n_in=transitions.X.shape[1],
                    n_out=transitions.X.shape[1] *
                    (N_actions if type_ae == "AEA" else 1),
                    min_n=min_n,
                    max_n=max_n,
                    device=device) for transitions in all_transitions
    ]

    path_auto_encoders = workspace / "ae"
    makedirs(path_auto_encoders)
    print("learning_rate", learning_rate)
    print("optimizer_str", optimizer_str)
    print("weight_decay", weight_decay)
    # exit()
    for ienv, transitions in enumerate(all_transitions):
        autoencoders[ienv].reset()
        optimizer = optimizer_factory(optimizer_str,
                                      autoencoders[ienv].parameters(),
                                      lr=learning_rate,
                                      weight_decay=weight_decay)
        # for x,y in zip(transitions.X,transitions.A):
        #     print(x,"->",y)
        autoencoders[ienv].fit(transitions,
                               size_minibatch=all_transitions[ienv].X.shape[0],
                               n_epochs=n_epochs,
                               optimizer=optimizer,
                               normalize=normalize,
                               stop_loss=0.01,
                               loss_function=loss_function,
                               writer=writer)

        path_autoencoder = path_auto_encoders / "{}.pt".format(ienv)
        logger.info("saving autoencoder at {}".format(path_autoencoder))
        torch.save(autoencoders[ienv], path_autoencoder)
Ejemplo n.º 3
0
def main(loss_autoencoders_str, feature_autoencoder_info, target_envs, N, path_models, path_samples, seed,
         source_params, device,N_actions):
    loss_autoencoders = loss_fonction_factory(loss_autoencoders_str)
    autoencoders = utils.load_autoencoders(path_models, device)
    feature_autoencoder = build_feature_autoencoder(feature_autoencoder_info)
    tm = TransferModule(
        autoencoders=autoencoders,
        loss_autoencoders=loss_autoencoders,
        feature_autoencoders=feature_autoencoder,
        device=device,
        N_actions=N_actions,
        sources_params=source_params)
    errors_base = []
    # all_transitions = utils.read_samples_for_autoencoders(path_samples, feature_autoencoder)
    memories = load_memories(path_samples,as_json=False)
    for memory in memories:
        tm.reset()
        tm.push_memory(memory.memory)
        tm.update()
        errors_base.append(tm.errors)

    print("================================================ base ================================================")
    print(errors_base)
    print(utils.array_to_cross_comparaison(errors_base, source_params, source_params))

    test_envs, test_params = generate_envs(**target_envs)

    errors_test = []
    for ienv, test_env in enumerate(test_envs):
        tm.reset()
        if seed is not None:
            test_env.seed(seed)
        for _ in range(N):
            s = test_env.reset()
            done = False
            while not done:
                if hasattr(test_env, "action_space_executable"):
                    a = np.random.choice(test_env.action_space_executable())
                else:
                    a = test_env.action_space.sample()
                s_, r_, done, info = test_env.step(a)
                tm.push(s, a, r_, s_, done, info)
                s = s_
        tm.update()
        errors_test.append(tm.errors)

    print("================================================ test ================================================")
    print(utils.array_to_cross_comparaison(errors_test, source_params, test_params))
Ejemplo n.º 4
0
 def __init__(self,
              loss_function,
              gamma,
              feature=None,
              sources_params=None,
              evaluate_continuously=False,
              selection_method="best_fit",
              Q_sources=None,
              device=None,
              **kwargs):
     super().__init__(sources_params, evaluate_continuously, selection_method)
     self.memory = []
     self.Q_sources = Q_sources
     self.feature = feature
     self.device = device
     self.gamma=gamma
     self.loss_function=loss_fonction_factory(loss_function)
Ejemplo n.º 5
0
    def __init__(self,
                 device,
                 policy_network,
                 gamma=0.999,
                 batch_size_experience_replay=128,
                 target_update=10,
                 optimizer=None,
                 loss_function=None,
                 workspace=None,
                 lr=None,
                 weight_decay=None,
                 transfer_module=None,
                 writer=None,
                 **kwargs):
        self.weights_over_time = []
        self.biais_over_time = []
        self.best_fit_over_time = []
        self.ae_errors_over_time = []
        self.probas_over_time = []
        self.writer = writer
        self.tranfer_module = transfer_module
        self.device = device
        self.size_mini_batch = batch_size_experience_replay
        self.GAMMA = gamma
        self.TARGET_UPDATE = target_update
        self.workspace = workspace
        self.policy_net = policy_network.to(self.device)

        self.memory = Memory()
        self.i_episode = 0
        self.n_actions = self.policy_net.predict.out_features
        self.loss_function = loss_fonction_factory(loss_function)
        self.lr = lr
        self.weight_decay = weight_decay
        self.optimizer_type = optimizer
        self.optimizer = None
        self.reset()