def __init__(self, id, device, policy_network, gamma=0.999, batch_size_experience_replay=128, target_update=10, optimizer=None, loss_function=None, loss_function_gate=None, ratio_learn_test=None, workspace=None, lr=None, weight_decay=None, transfer_module=None, writer=None, info={}, transfer_param_init=None, feature=None, **kwargs): self.id = id self.info = info self.tm = transfer_module # self.type_transfer_module = type(transfer_module) # print(self.type_transfer_module) # exit() self.feature = feature if transfer_param_init is None: self.transfer_param_init = { "w": np.random.random_sample(1)[0], "b": np.random.random_sample(1)[0] } else: self.transfer_param_init = transfer_param_init self.ratio_learn_test = ratio_learn_test self.loss_function_gate = loss_fonction_factory(loss_function_gate) self.best_fit_over_time = [] self.ae_errors_over_time = [] self.error_bootstrap_source = [] self.error_bootstrap_partial = [] self.p_over_time = [] self.writer = writer self.device = device self.size_mini_batch = batch_size_experience_replay self.gamma = gamma self.target_update = target_update self.workspace = workspace self.full_net = policy_network.to(self.device) self.n_actions = self.full_net.predict.out_features self.loss_function = loss_fonction_factory(loss_function) self.lr = lr self.weight_decay = weight_decay self.optimizer_type = optimizer self.reset()
def main(loss_function_str, optimizer_str, weight_decay, learning_rate, normalize, autoencoder_size, n_epochs, feature_autoencoder_info, workspace, device, type_ae="AEA", N_actions=None, writer=None): import torch loss_function = loss_fonction_factory(loss_function_str) makedirs(workspace) feature = build_feature_autoencoder(feature_autoencoder_info) min_n, max_n = autoencoder_size all_transitions = utils.read_samples_for_ae(workspace / "samples", feature, N_actions) autoencoders = [ AutoEncoder(n_in=transitions.X.shape[1], n_out=transitions.X.shape[1] * (N_actions if type_ae == "AEA" else 1), min_n=min_n, max_n=max_n, device=device) for transitions in all_transitions ] path_auto_encoders = workspace / "ae" makedirs(path_auto_encoders) print("learning_rate", learning_rate) print("optimizer_str", optimizer_str) print("weight_decay", weight_decay) # exit() for ienv, transitions in enumerate(all_transitions): autoencoders[ienv].reset() optimizer = optimizer_factory(optimizer_str, autoencoders[ienv].parameters(), lr=learning_rate, weight_decay=weight_decay) # for x,y in zip(transitions.X,transitions.A): # print(x,"->",y) autoencoders[ienv].fit(transitions, size_minibatch=all_transitions[ienv].X.shape[0], n_epochs=n_epochs, optimizer=optimizer, normalize=normalize, stop_loss=0.01, loss_function=loss_function, writer=writer) path_autoencoder = path_auto_encoders / "{}.pt".format(ienv) logger.info("saving autoencoder at {}".format(path_autoencoder)) torch.save(autoencoders[ienv], path_autoencoder)
def main(loss_autoencoders_str, feature_autoencoder_info, target_envs, N, path_models, path_samples, seed, source_params, device,N_actions): loss_autoencoders = loss_fonction_factory(loss_autoencoders_str) autoencoders = utils.load_autoencoders(path_models, device) feature_autoencoder = build_feature_autoencoder(feature_autoencoder_info) tm = TransferModule( autoencoders=autoencoders, loss_autoencoders=loss_autoencoders, feature_autoencoders=feature_autoencoder, device=device, N_actions=N_actions, sources_params=source_params) errors_base = [] # all_transitions = utils.read_samples_for_autoencoders(path_samples, feature_autoencoder) memories = load_memories(path_samples,as_json=False) for memory in memories: tm.reset() tm.push_memory(memory.memory) tm.update() errors_base.append(tm.errors) print("================================================ base ================================================") print(errors_base) print(utils.array_to_cross_comparaison(errors_base, source_params, source_params)) test_envs, test_params = generate_envs(**target_envs) errors_test = [] for ienv, test_env in enumerate(test_envs): tm.reset() if seed is not None: test_env.seed(seed) for _ in range(N): s = test_env.reset() done = False while not done: if hasattr(test_env, "action_space_executable"): a = np.random.choice(test_env.action_space_executable()) else: a = test_env.action_space.sample() s_, r_, done, info = test_env.step(a) tm.push(s, a, r_, s_, done, info) s = s_ tm.update() errors_test.append(tm.errors) print("================================================ test ================================================") print(utils.array_to_cross_comparaison(errors_test, source_params, test_params))
def __init__(self, loss_function, gamma, feature=None, sources_params=None, evaluate_continuously=False, selection_method="best_fit", Q_sources=None, device=None, **kwargs): super().__init__(sources_params, evaluate_continuously, selection_method) self.memory = [] self.Q_sources = Q_sources self.feature = feature self.device = device self.gamma=gamma self.loss_function=loss_fonction_factory(loss_function)
def __init__(self, device, policy_network, gamma=0.999, batch_size_experience_replay=128, target_update=10, optimizer=None, loss_function=None, workspace=None, lr=None, weight_decay=None, transfer_module=None, writer=None, **kwargs): self.weights_over_time = [] self.biais_over_time = [] self.best_fit_over_time = [] self.ae_errors_over_time = [] self.probas_over_time = [] self.writer = writer self.tranfer_module = transfer_module self.device = device self.size_mini_batch = batch_size_experience_replay self.GAMMA = gamma self.TARGET_UPDATE = target_update self.workspace = workspace self.policy_net = policy_network.to(self.device) self.memory = Memory() self.i_episode = 0 self.n_actions = self.policy_net.predict.out_features self.loss_function = loss_fonction_factory(loss_function) self.lr = lr self.weight_decay = weight_decay self.optimizer_type = optimizer self.optimizer = None self.reset()