) env = Learner( log_dir=log_dir, i_data=data[1]['train'], j_data=data[0]['train'], tst_data=data[1]['tst'], learning_steps=500, learning_steps_max=50000, ) with open(file_name, 'a') as file: file.write('\n\n===NEW DATA LOADING====\n\n') file.write('\n\ni_data: ' + str(1)) file.write('\n\nj_data: ' + str(3)) for i in range(5): train_DDQN(RL=RL_prio, env=env, file_name=file_name, penalty=0.02) # double_DQN.plot_cost() j = np.random.randint(4) env.i_data = data[j]['train'] env.reward_tst_data = data[j]['tst'] k = np.random.randint(4) env.j_data = data[k]['train'] with open(file_name, 'a') as file: file.write('\n\n===NEW DATA LOADING====\n\n') file.write('\n\ni_data: ' + str(j)) file.write('\n\nj_data: ' + str(k)) double_DQN.plot_cost() double_DQN.save_model('episode-3') env.destroy()