trans_cost = 0 # The cost to do a single transaction (expressed in pips) batch_size = 30 # The number of tuple (state, action, reward, next_state) to save before replay stop_loss_value = -50 # The maximum loss that we can handle (expressed in pips) performance_file_path = "performance/train_performance.txt" # Path where to store the training performance log file log = "performance/train_log.txt" # Path where to store the training log file models_path = "models/" # Path where are stored the models n_prev_iterations = len(next(os.walk( models_path))[2]) # Get the number of existent models in the models_path setup(seed_value=7) # ********************************* Creating the Agent Model and the Environment Model ********************************* env = Environment(ds_path=ds_path, window_size=window_size, pip_pos=pip_pos, stop_loss=stop_loss_value, trans_cost=trans_cost) actions = env.get_actions() # Getting the available action of the environment agent = Agent(env.get_state_size(), env.get_actions_n()) if os.path.exists( performance_file_path ): # Checking if there are previous training performances saved os.remove(performance_file_path) # Deleting the old train performances if os.path.exists( log): # Checking if there are previous training performances saved os.remove(log) # Deleting the old train performances print(dt.now()) print("stop loss:", stop_loss_value) print("pc: BH") # ********************************************* Looping over all Episodes ***************-****************************** for ep in range(n_episodes - n_prev_iterations):
def evaluate(model_name): time_start = dt.now() model = load_model(model_name) # Load the NN-agent model state_size = model.layers[0].input.shape.as_list()[ 1] # Load the state size from the model window_size = int(state_size / 2) env = Environment(ds_path=ds_path, window_size=window_size, pip_pos=pip_pos, stop_loss=stop_loss_value, trans_cost=trans_cost) actions = env.get_actions( ) # Getting the available actions of the environment actions_size = env.get_actions_n( ) # Getting the number of the actions available into the environment agent = Agent(state_size=state_size, action_size=actions_size, is_eval=True, model_name=model_name) state, reward = env.step( "Hold") # Making a first neutral action for get the first state total_revenue = 0 while not env.done: # Loop until we finish all the instances action = agent.act( state) # The agent choose an action based on the current state next_state, reward = env.step( actions[action] ) # Getting the next state and reward based on the action choose #with open(log, "a+") as file: #file.write(str(actions[action]) + "\n") # Saving the performance on a file #if env.stop_loss_triggered: #file.write("Stop Loss Triggered!" + "\n") # Saving the stop loss taken on a file #file.write(str(reward) + "\n") # Saving the performance on a file '''print(colored("Observation:", 'blue'), state) print(colored("Action:", 'yellow'), actions[action]) if env.stop_loss_triggered: # Alert when we got a stop loss from the environment print(colored('Stop loss triggered!', 'red')) print(colored("Next Observation:", 'blue'), next_state) print(colored("Reward:", 'cyan'), reward)''' total_revenue += reward #agent.memory.append((state, action, reward, next_state)) # Saving the experience state = next_state #if len(agent.memory) > batch_size: # Making an analysis based on our experience # agent.exp_replay(batch_size) # ***************************** Showing and Saving the Results over a Single Episode ******************************* #print("-----------------------------------------------------------------------------------------------------------") if total_revenue > 0: print(colored("Total Profit: ", 'blue'), colored(str(round(total_revenue, 1)), 'cyan'), "pips") else: print(colored("Total Profit: ", 'blue'), colored(str(round(total_revenue, 1)), 'red'), "pips") with open(performance_file_path, "a+") as file: file.write(str(round(total_revenue, 1)) + "\n") # Saving the performance on a file time_stop = dt.now() print(colored("Execution time for this episode:", 'yellow'), round((time_stop - time_start).total_seconds(), 0), "seconds")
def main(): """ Sets the parameters for the Environment, Critic, and Actor according to the imported config file. Creates an environment where a predefined number of episodes can be performed. Instantiates an actor to keep track of the policy, and a critic to keep track of the value at each state Runs a predefined number of episodes creating a new board for each episode. For each episode, the actor and the critic are updated according to the Actor-Critic model. Finally, epsilon is set to zero, and the environment plays a game with the updated policy. """ env = Environment(env_cfg) granularity = env_cfg["granularity"] critic = Critic(critic_cfg, granularity) actor = Actor(actor_cfg) episodes = training_cfg["number_of_episodes"] visualize_episodes = training_cfg["visualize_episodes"] steps_per_episode = [] for episode in tqdm(range(episodes), desc=f"Playing {episodes} episodes", colour='#39ff14'): env.new_simulation() path = [] positions = [] critic.reset_eli_dict() actor.reset_eli_dict() while not env.reached_top() and not env.reached_max_steps(): env.update_steps() current_state = copy(env.get_state()) legal_actions = env.get_actions() action = actor.get_action(state=current_state, legal_actions=legal_actions) path.append((str(current_state), str(action))) reward = env.perform_action(action=action) td_err = critic.compute_td_err(current_state=current_state, next_state=env.get_state(), reward=reward) # Previous states on the path are updated as well during the call to train() by eligibility traces critic.train(state=current_state, td_error=td_err) critic.update_eligs() # Update actor beliefs on SAPs for all pairs seen thus far in the episode for i, sap in enumerate(reversed(path)): actor.update_eli_dict(state=str(sap[0]), action=str(sap[1]), i=i) actor.update_policy_dict(state=str(sap[0]), action=str(sap[1]), td_err=td_err) positions.append(env.get_position()) print("steps used in this episode", env.steps) if episode in visualize_episodes: env.visualize_landscape(positions) steps_per_episode.append(env.steps) plot_learning(steps_per_episode) # Enable history tracking to visualize final simulation env.new_simulation() print(f"Actor final epsilon: {actor.epsilon}") actor.epsilon = 0 # Set exploration to 0 print("Attempting final simulation to show you how smart I am now") while not env.reached_top() and not env.reached_max_steps(): current_state = env.get_state() legal_actions = env.get_actions() action = actor.get_action(current_state, legal_actions) env.perform_action(action)