def meta_training(self): # Interacting with the environment: # For initialize_memory, just randomly sample actions from the action space, use env.action_space.sample() if self.args.train: self.initialize_memory() # Train for at least these many episodes. print("Starting Main Training Procedure.") meta_counter = 0 self.set_parameters(meta_counter) for e in range(self.number_episodes): # Maintain coujnter to keep track of updating the policy regularly. # And to check if we are exceeding max number of timesteps . counter = 0 # Reset environment. state = self.environment.reset() terminal = False # Within each episode, just keep going until you terminate or we reach max number of timesteps. while not(terminal) and counter<self.max_timesteps: self.set_parameters(meta_counter) # SAMPLE ACTION FROM POLICY(STATE) # action = self.step_size*self.select_action_beta(state) if self.args.train: action, expert_action = self.select_action_beta(state) else: action = self.select_action(state) # TAKE STEP WITH ACTION next_state, onestep_reward, terminal, success = self.environment.step(action) # embed() # If render flag on, render environment. if self.args.render: self.environment.render() if self.args.train: # STORE TRANSITION IN MEMORY WITH EXPERT ACTION: new_transition = Transition(state,expert_action,next_state,onestep_reward,terminal,success) self.memory.append_to_memory(new_transition) # UPDATE POLICY (need to decide whether to do thios at every step, or less frequently). if self.args.train: self.policy_update(counter) state = copy.deepcopy(next_state) # Increment counter. counter+=1 meta_counter+=1 # If counter % save_ if meta_counter%self.save_every==0 and self.args.train: self.PolicyModel.save_model(meta_counter) print("Reached Iteration",meta_counter)
def initialize_memory(self): # Now we are going to initialize the memory with a set number of demonstrations. self.number_demonstrations = 500 # transition must have: obs, action taken, terminal?, reward, success, next_state self.max_timesteps = 200 self.number_episodes = 0 print("Starting Memory Burn In.") self.set_parameters(0) # For INITIALIZING MEMORY ALONE: Set the beta value to 1. - collect expert demonstrations. self.annealed_beta = 1. # While number of episodes less than number of demonstrations. while self.number_episodes < self.number_demonstrations: # Start a new episode. counter = 0 state = self.environment.reset() terminal = False episode = [] while counter < self.max_timesteps and not (terminal): # Retrieve action - with beta ==1, this will return expert, expert. action, expert_action = self.select_action_beta(state) # Take a step in the environment. next_state, onestep_reward, terminal, success = self.environment.step( action) # If render flag on, render environment. if self.args.render: self.environment.render() # Store in instance of transition class. # Remember, here we are adding EXPERT action to the memory. new_transition = Transition(state, expert_action, next_state, onestep_reward, terminal, success) # Do not append transition to memory yet. # Append to episode, then append episode. episode.append(new_transition) # Copy next state into state. state = copy.deepcopy(next_state) # Increment counter. counter += 1 # Append new episode to memory. self.memory.append_to_memory(episode) self.number_episodes += 1 self.max_timesteps = 2000 print("Memory Burn In Complete.")
def initialize_memory(self): # Number of initial transitions needs to be less than memory size. self.initial_transitions = 5000 # transition must have: obs, action taken, terminal?, reward, success, next_state # While memory isn't full: #while self.memory.check_full()==0: self.max_timesteps = 200 print("Starting Memory Burn In.") self.set_parameters(0) # While number of transitions is less than initial_transitions. while self.memory.memory_len < self.initial_transitions: # Start a new episode. counter = 0 state = self.environment.reset() terminal = False while counter < self.max_timesteps and self.memory.memory_len < self.initial_transitions and not ( terminal): # Put in new transitions. # action = self.environment.action_space.sample() # action = self.step_size*self.select_action_beta(state) action, expert_action = self.select_action_beta(state) # print(action) # Take a step in the environment. next_state, onestep_reward, terminal, success = self.environment.step( action) # If render flag on, render environment. if self.args.render: self.environment.render() # Store in instance of transition class. new_transition = Transition(state, expert_action, next_state, onestep_reward, terminal, success) # Append new transition to memory. self.memory.append_to_memory(new_transition) # Copy next state into state. state = copy.deepcopy(next_state) # Increment counter. counter += 1 self.max_timesteps = 2000 print("Memory Burn In Complete.")
def __init__(self): self.FSM = StateMachine() #-----adding all states--------- self.FSM.add_states("Eligible",LoanEligibleCheckState("LoanEligibleState",self.FSM)) self.FSM.add_states("Applied", AppliedState("AppliedState",self.FSM)) self.FSM.add_states("DocVerified", DocumentVerifiedState("DocumentVerifiedState",self.FSM)) self.FSM.add_states("PropertyCheck", ApplicantPropertyVerifiedState("ApplicantPropertyVerifiedState",self.FSM)) self.FSM.add_states("Approved", LoanSanctionedState("LoanSanctionedState",self.FSM)) self.FSM.add_states("Rejected", LoanRejectedState("LoanRejectedState",self.FSM)) #-----adding all Transition-------- self.FSM.add_transition("toApplied", Transition("Applied")) self.FSM.add_transition("toEligible", Transition("Eligible")) self.FSM.add_transition("toDocVerification", Transition("DocVerified")) self.FSM.add_transition("toPropertyCheck", Transition("PropertyCheck")) self.FSM.add_transition("toApprove", Transition("Approved")) self.FSM.add_transition("toRejected", Transition("Rejected")) self.FSM.set_state("Eligible")
def initialize_memory(self): # Number of initial transitions needs to be less than memory size. self.initial_transitions = 5000 # transition must have: obs, action taken, terminal?, reward, success, next_state # While memory isn't full: #while self.memory.check_full()==0: self.max_timesteps = 500 print("Starting Memory Burn In.") self.set_parameters(0) episode_counter = 0 # While number of transitions is less than initial_transitions. while self.memory.memory_len < self.initial_transitions: # Start a new episode. counter = 0 eps_reward = 0. state = self.environment.reset() terminal = False # Create a list of transitions that represents the episode. episode_transition_list = [] while counter < self.max_timesteps and self.memory.memory_len < self.initial_transitions and not ( terminal): # Put in new transitions. action = self.environment.action_space.sample() # action = self.select_action_beta(state) # Take a step in the environment. next_state, onestep_reward, terminal, success = self.environment.step( action) eps_reward += copy.deepcopy(onestep_reward) # # If render flag on, render environment. # if self.args.render: # self.environment.render() memory_terminal, terminal = self.check_alternate_termination( next_state, terminal, success) # Store in instance of transition class. new_transition = Transition(state, action, next_state, onestep_reward, memory_terminal, success) # Append new transition to LIST, NOT MEMORY. episode_transition_list.append(new_transition) # self.memory.append_to_memory(new_transition) # Copy next state into state. state = copy.deepcopy(next_state) # Increment counter. counter += 1 # Now that the episode is done, # Change all the "Desired goal" variables to the goal actually achieved. achieved_goal = copy.deepcopy(state['achieved_goal']) # Copy the Actually achieved goal as the desired goal to all transitions in the memory. # Now append the transiiton into the memory. for k in range(len(episode_transition_list)): episode_transition_list[k].state[ 'desired_goal'] = copy.deepcopy(achieved_goal) episode_transition_list[k].next_state[ 'desired_goal'] = copy.deepcopy(achieved_goal) # Append into memory. self.memory.append_to_memory(episode_transition_list[k]) # Print statistics. print("Episode: ", episode_counter, " Reward: ", eps_reward, " Counter:", counter, terminal) episode_counter += 1 self.max_timesteps = 2000 print("Memory Burn In Complete.")
def meta_training(self): # Interacting with the environment: # For initialize_memory, just randomly sample actions from the action space, use env.action_space.sample() if self.args.train: self.initialize_memory() print("Starting Main Training Procedure.") meta_counter = 0 episode_counter = 0 self.set_parameters(meta_counter) for e in range(self.number_episodes): # Maintain coujnter to keep track of updating the policy regularly. # And to check if we are exceeding max number of timesteps . counter = 0 # Reset environment. state = self.environment.reset() terminal = False eps_reward = 0. memory_terminal = False # Create list for episode. episode_transition_list = [] # Within each episode, just keep going until you terminate or we reach max number of timesteps. while not (terminal) and counter < self.max_timesteps: self.set_parameters(meta_counter) # SAMPLE ACTION FROM POLICY(STATE) action = self.select_action(state) # action = self.select_action_beta(state) next_state, onestep_reward, terminal, success = self.environment.step( action) eps_reward += copy.deepcopy(onestep_reward) memory_terminal, terminal = self.check_alternate_termination( next_state, terminal, success) # If render flag on, render environment. if self.args.render: self.environment.render() if self.args.train: # STORE TRANSITION IN MEMORY. new_transition = Transition(state, action, next_state, onestep_reward, memory_terminal, success) episode_transition_list.append(new_transition) # self.memory.append_to_memory(new_transition) # UPDATE POLICY (need to decide whether to do thios at every step, or less frequently). self.policy_update(meta_counter) else: print(action) state = copy.deepcopy(next_state) # Increment counter. counter += 1 meta_counter += 1 # If counter % save_ if meta_counter % self.save_every == 0 and self.args.train: self.ACModel.save_model(meta_counter) print("Reached Iteration", meta_counter) achieved_goal = copy.deepcopy(state['achieved_goal']) for k in range(len(episode_transition_list)): episode_transition_list[k].state[ 'desired_goal'] = copy.deepcopy(achieved_goal) episode_transition_list[k].next_state[ 'desired_goal'] = copy.deepcopy(achieved_goal) self.memory.append_to_memory(episode_transition_list[k]) print("Episode: ", episode_counter, " Reward: ", eps_reward, " Counter:", counter, terminal) episode_counter += 1