Example #1
0
	def meta_training(self):
		# Interacting with the environment: 
		# For initialize_memory, just randomly sample actions from the action space, use env.action_space.sample()

		if self.args.train:
			self.initialize_memory()
		# Train for at least these many episodes. 
		
		print("Starting Main Training Procedure.")
		meta_counter = 0
		self.set_parameters(meta_counter)

		for e in range(self.number_episodes):

			# Maintain coujnter to keep track of updating the policy regularly. 
			# And to check if we are exceeding max number of timesteps .
			counter = 0			

			# Reset environment.
			state = self.environment.reset()
			terminal = False
						
			# Within each episode, just keep going until you terminate or we reach max number of timesteps. 
			while not(terminal) and counter<self.max_timesteps:

				self.set_parameters(meta_counter)

				# SAMPLE ACTION FROM POLICY(STATE)				
				# action = self.step_size*self.select_action_beta(state)
				if self.args.train:
					action, expert_action = self.select_action_beta(state)
				else: 
					action = self.select_action(state)

				# TAKE STEP WITH ACTION
				next_state, onestep_reward, terminal, success = self.environment.step(action)				
				# embed()
				# If render flag on, render environment.
				if self.args.render: 
					self.environment.render()				

				if self.args.train:
					# STORE TRANSITION IN MEMORY WITH EXPERT ACTION: 
					new_transition = Transition(state,expert_action,next_state,onestep_reward,terminal,success)
					self.memory.append_to_memory(new_transition)

				# UPDATE POLICY (need to decide whether to do thios at every step, or less frequently).
				if self.args.train:
					self.policy_update(counter)

				state = copy.deepcopy(next_state)

				# Increment counter. 
				counter+=1
				meta_counter+=1 
				# If counter % save_
				if meta_counter%self.save_every==0 and self.args.train:
					self.PolicyModel.save_model(meta_counter)
					print("Reached Iteration",meta_counter)
Example #2
0
    def initialize_memory(self):

        # Now we are going to initialize the memory with a set number of demonstrations.
        self.number_demonstrations = 500
        # transition must have: obs, action taken, terminal?, reward, success, next_state

        self.max_timesteps = 200
        self.number_episodes = 0
        print("Starting Memory Burn In.")
        self.set_parameters(0)

        # For INITIALIZING MEMORY ALONE: Set the beta value to 1. - collect expert demonstrations.
        self.annealed_beta = 1.

        # While number of episodes less than number of demonstrations.
        while self.number_episodes < self.number_demonstrations:

            # Start a new episode.
            counter = 0
            state = self.environment.reset()
            terminal = False

            episode = []

            while counter < self.max_timesteps and not (terminal):

                # Retrieve action - with beta ==1, this will return expert, expert.
                action, expert_action = self.select_action_beta(state)

                # Take a step in the environment.
                next_state, onestep_reward, terminal, success = self.environment.step(
                    action)

                # If render flag on, render environment.
                if self.args.render:
                    self.environment.render()

                # Store in instance of transition class.
                # Remember, here we are adding EXPERT action to the memory.
                new_transition = Transition(state, expert_action, next_state,
                                            onestep_reward, terminal, success)

                # Do not append transition to memory yet.
                # Append to episode, then append episode.
                episode.append(new_transition)

                # Copy next state into state.
                state = copy.deepcopy(next_state)

                # Increment counter.
                counter += 1

            # Append new episode to memory.
            self.memory.append_to_memory(episode)
            self.number_episodes += 1

        self.max_timesteps = 2000
        print("Memory Burn In Complete.")
Example #3
0
    def initialize_memory(self):

        # Number of initial transitions needs to be less than memory size.
        self.initial_transitions = 5000
        # transition must have: obs, action taken, terminal?, reward, success, next_state

        # While memory isn't full:
        #while self.memory.check_full()==0:
        self.max_timesteps = 200
        print("Starting Memory Burn In.")
        self.set_parameters(0)

        # While number of transitions is less than initial_transitions.
        while self.memory.memory_len < self.initial_transitions:

            # Start a new episode.
            counter = 0
            state = self.environment.reset()
            terminal = False

            while counter < self.max_timesteps and self.memory.memory_len < self.initial_transitions and not (
                    terminal):

                # Put in new transitions.
                # action = self.environment.action_space.sample()
                # action = self.step_size*self.select_action_beta(state)
                action, expert_action = self.select_action_beta(state)
                # print(action)

                # Take a step in the environment.
                next_state, onestep_reward, terminal, success = self.environment.step(
                    action)

                # If render flag on, render environment.
                if self.args.render:
                    self.environment.render()

                # Store in instance of transition class.
                new_transition = Transition(state, expert_action, next_state,
                                            onestep_reward, terminal, success)

                # Append new transition to memory.
                self.memory.append_to_memory(new_transition)

                # Copy next state into state.
                state = copy.deepcopy(next_state)

                # Increment counter.
                counter += 1

        self.max_timesteps = 2000
        print("Memory Burn In Complete.")
Example #4
0
    def __init__(self):
        self.FSM = StateMachine()

        #-----adding all states---------
        self.FSM.add_states("Eligible",LoanEligibleCheckState("LoanEligibleState",self.FSM))
        self.FSM.add_states("Applied", AppliedState("AppliedState",self.FSM))
        self.FSM.add_states("DocVerified", DocumentVerifiedState("DocumentVerifiedState",self.FSM))
        self.FSM.add_states("PropertyCheck", ApplicantPropertyVerifiedState("ApplicantPropertyVerifiedState",self.FSM))
        self.FSM.add_states("Approved", LoanSanctionedState("LoanSanctionedState",self.FSM))
        self.FSM.add_states("Rejected", LoanRejectedState("LoanRejectedState",self.FSM))


       #-----adding all Transition--------
        self.FSM.add_transition("toApplied", Transition("Applied"))
        self.FSM.add_transition("toEligible", Transition("Eligible"))
        self.FSM.add_transition("toDocVerification", Transition("DocVerified"))
        self.FSM.add_transition("toPropertyCheck", Transition("PropertyCheck"))
        self.FSM.add_transition("toApprove", Transition("Approved"))
        self.FSM.add_transition("toRejected", Transition("Rejected"))

        self.FSM.set_state("Eligible")
Example #5
0
    def initialize_memory(self):

        # Number of initial transitions needs to be less than memory size.
        self.initial_transitions = 5000
        # transition must have: obs, action taken, terminal?, reward, success, next_state

        # While memory isn't full:
        #while self.memory.check_full()==0:
        self.max_timesteps = 500
        print("Starting Memory Burn In.")
        self.set_parameters(0)

        episode_counter = 0

        # While number of transitions is less than initial_transitions.
        while self.memory.memory_len < self.initial_transitions:

            # Start a new episode.
            counter = 0
            eps_reward = 0.
            state = self.environment.reset()
            terminal = False

            # Create a list of transitions that represents the episode.
            episode_transition_list = []

            while counter < self.max_timesteps and self.memory.memory_len < self.initial_transitions and not (
                    terminal):

                # Put in new transitions.
                action = self.environment.action_space.sample()
                # action = self.select_action_beta(state)

                # Take a step in the environment.
                next_state, onestep_reward, terminal, success = self.environment.step(
                    action)

                eps_reward += copy.deepcopy(onestep_reward)

                # # If render flag on, render environment.
                # if self.args.render:
                # 	self.environment.render()
                memory_terminal, terminal = self.check_alternate_termination(
                    next_state, terminal, success)

                # Store in instance of transition class.
                new_transition = Transition(state, action, next_state,
                                            onestep_reward, memory_terminal,
                                            success)

                # Append new transition to LIST, NOT MEMORY.
                episode_transition_list.append(new_transition)
                # self.memory.append_to_memory(new_transition)

                # Copy next state into state.
                state = copy.deepcopy(next_state)

                # Increment counter.
                counter += 1

            # Now that the episode is done,
            # Change all the "Desired goal" variables to the goal actually achieved.
            achieved_goal = copy.deepcopy(state['achieved_goal'])

            # Copy the Actually achieved goal as the desired goal to all transitions in the memory.
            # Now append the transiiton into the memory.
            for k in range(len(episode_transition_list)):
                episode_transition_list[k].state[
                    'desired_goal'] = copy.deepcopy(achieved_goal)
                episode_transition_list[k].next_state[
                    'desired_goal'] = copy.deepcopy(achieved_goal)

                # Append into memory.
                self.memory.append_to_memory(episode_transition_list[k])

            # Print statistics.
            print("Episode: ", episode_counter, " Reward: ", eps_reward,
                  " Counter:", counter, terminal)
            episode_counter += 1

        self.max_timesteps = 2000
        print("Memory Burn In Complete.")
Example #6
0
    def meta_training(self):
        # Interacting with the environment:

        # For initialize_memory, just randomly sample actions from the action space, use env.action_space.sample()
        if self.args.train:
            self.initialize_memory()

        print("Starting Main Training Procedure.")
        meta_counter = 0
        episode_counter = 0
        self.set_parameters(meta_counter)

        for e in range(self.number_episodes):

            # Maintain coujnter to keep track of updating the policy regularly.
            # And to check if we are exceeding max number of timesteps .
            counter = 0

            # Reset environment.
            state = self.environment.reset()
            terminal = False
            eps_reward = 0.
            memory_terminal = False

            # Create list for episode.
            episode_transition_list = []

            # Within each episode, just keep going until you terminate or we reach max number of timesteps.
            while not (terminal) and counter < self.max_timesteps:

                self.set_parameters(meta_counter)

                # SAMPLE ACTION FROM POLICY(STATE)
                action = self.select_action(state)
                # action = self.select_action_beta(state)

                next_state, onestep_reward, terminal, success = self.environment.step(
                    action)

                eps_reward += copy.deepcopy(onestep_reward)

                memory_terminal, terminal = self.check_alternate_termination(
                    next_state, terminal, success)

                # If render flag on, render environment.
                if self.args.render:
                    self.environment.render()

                if self.args.train:
                    # STORE TRANSITION IN MEMORY.
                    new_transition = Transition(state, action, next_state,
                                                onestep_reward,
                                                memory_terminal, success)

                    episode_transition_list.append(new_transition)
                    # self.memory.append_to_memory(new_transition)

                    # UPDATE POLICY (need to decide whether to do thios at every step, or less frequently).
                    self.policy_update(meta_counter)
                else:
                    print(action)

                state = copy.deepcopy(next_state)

                # Increment counter.
                counter += 1
                meta_counter += 1
                # If counter % save_
                if meta_counter % self.save_every == 0 and self.args.train:
                    self.ACModel.save_model(meta_counter)
                    print("Reached Iteration", meta_counter)

            achieved_goal = copy.deepcopy(state['achieved_goal'])

            for k in range(len(episode_transition_list)):
                episode_transition_list[k].state[
                    'desired_goal'] = copy.deepcopy(achieved_goal)
                episode_transition_list[k].next_state[
                    'desired_goal'] = copy.deepcopy(achieved_goal)

                self.memory.append_to_memory(episode_transition_list[k])

            print("Episode: ", episode_counter, " Reward: ", eps_reward,
                  " Counter:", counter, terminal)
            episode_counter += 1