#max_step - number of steps within an episode Experiments = 10 Experiments_All_Rewards = np.zeros(shape=(max_n_ep, Experiments)) lbda = 1 perdatapoint = 0 prior = log_normal coupling = 4 for e in range(Experiments): print("Experiment", e) value_function = BHN_Q_Network(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling) epsilon = 0.1 #decay rate for the temperature parameter discount = 0.9 agent = AgentEpsGreedy(n_actions=n_actions, value_function_model=value_function, state_dim=state_dim, batch_size=batch_size, eps=epsilon) memory = ReplayMemory(max_size=100000) loss_per_ep = [] w1_m_per_ep = []
for e in range(Experiments): print ("Experiment", e) weight_shapes = [(512, 256), (256, 2)] coupling_dim = 256 value_function = BHN_Q_Network(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling, wn=WN, weight_shapes=weight_shapes, coupling_dim=coupling_dim) epsilon = 0.1 #decay rate for the temperature parameter discount = 0.9 agent = AgentEpsGreedy(n_actions=n_actions, value_function_model=value_function, state_dim=state_dim, batch_size=batch_size, eps=epsilon) memory = ReplayMemory(max_size=100000) loss_per_ep = [] w1_m_per_ep = [] w2_m_per_ep = [] w3_m_per_ep = []