Exemple #1
0
    def __init__(self,path,memorySize,historySize,height,width,seed):

        self.path = path
        self.memorySize = memorySize
        self.historySize = historySize
        self.height = height
        self.width = width
        self.seed = seed
        self.learningRate = 0.00025
        self.gamma = 0.99
        self.loadModel = False
        self.n_actions =4
        self.batchSize = 32

        self.Q_sum = 0
        self.Cost_sum = 0
        self.trainStart = False

        self.memory = Memory.Memory(path=self.path, size=self.memorySize, historySize=self.historySize, dims=[height, width],
                               seed=self.seed)

        with tf.device('/gpu:3'):
            with tf.variable_scope("train") as train_scope:
                self.Q_train = deepQNetwork.DeepQNetwork(self.height, self.width, self.historySize, self.n_actions, self.gamma, self.learningRate, self.seed,
                                                    trainable=True)
            with tf.variable_scope("target") as target_scope:
                self.Q_target = deepQNetwork.DeepQNetwork(self.height, self.width, self.historySize, self.n_actions, self.gamma, self.learningRate,
                                                     self.seed + 1, trainable=False)

        # self.saver = tf.train.Saver(max_to_keep=None)
        # if self.loadModel is True:
        #     self.saver.restore(self.sess, self.modelPath)

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.initialize_all_variables())
Exemple #2
0
    actionValue_max = np.max(actionValues)
    index = np.argmax(actionValues, axis=1)
    return [index, actionValue_max]


# cv2.namedWindow("Imagetest")
def Scale(img):
    # cv2.imshow("Imagetest", img.reshape([210, 160], order=0))
    # cv2.waitKey (100)
    return (cv2.resize(img, (width, height))) / 255.


with tf.device('/gpu:1'):
    with tf.variable_scope("train") as train_scope:
        Q_train = deepQNetwork.DeepQNetwork(height, width, historyLength,
                                            n_actions, gamma, learningRate,
                                            SEED)
    with tf.variable_scope("target") as target_scope:
        Q_target = deepQNetwork.DeepQNetwork(height, width, historyLength,
                                             n_actions, gamma, learningRate,
                                             SEED)

sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())

saver = tf.train.Saver(max_to_keep=None)
if loadModel is True:
    saver.restore(sess, modelPath)

log = myLog.Log(logPath, 'w+')
Exemple #3
0

# cv2.namedWindow("Imagetest")
def Scale(img):
    # cv2.imshow("Imagetest", img.reshape([210, 160], order=0))
    # cv2.waitKey (1)
    return (cv2.resize(img, (width, height))) / 255.


with tf.device('/gpu:3'):
    with tf.variable_scope("train") as train_scope:
        Q_train = deepQNetwork.DeepQNetwork(height,
                                            width,
                                            historyLength,
                                            n_actions,
                                            gamma,
                                            learningRate,
                                            SEED,
                                            trainable=True,
                                            data_format=dataFormat)

    with tf.variable_scope("target") as target_scope:
        Q_target = deepQNetwork.DeepQNetwork(height,
                                             width,
                                             historyLength,
                                             n_actions,
                                             gamma,
                                             learningRate,
                                             SEED,
                                             trainable=False,
                                             data_format=dataFormat)
def train(scenario,
          average_reward_episodes,
          rendering,
          hidden_layers,
          hidden_layers_size,
          memory_size,
          minibatch_size,
          optimizer_learning_rate,
          gamma,
          epsilon_decay_factor,
          maximum_episodes,
          model_file_name,
          converge_criteria=None,
          graphs_suffix='',
          seed=None,
          verbose=C_VERBOSE_NONE):
    '''
    Summary: 
        Trains a DQN model for solving the given OpenAI gym scenario.
    
    Args:
        scenario: string
            The OpenAI gym scenario to be solved.
        
        average_reward_episodes: int
            On how many concecutive episodes the averaged reward should be calculated.
        
        rendering: boolean
            If True, OpenAI gym environment rendering is enabled. 
        
        hidden_layers: int
            The number of hidden layers of the Deep Neural Network. Not including the first
            and last layer.
            
        hidden_layers_size: int
            The size of each hidden layer of the Neural Network.
        
        memory_size: int
            The size of the replay memory feature which will be used by the DQN.
                
        minibatch_size: int
            The minibatch size which will be retrieved randomly from the memory in each 
            iteration in the DQN.
        
        optimizer_learning_rate: float
            The Adam optimizer learning rate used in the DNN.
        
        gamma: float
                The discount factor to be used in the equation (3) of [1].
        
        epsilon_decay_factor: float
            The decay factor of epsilon parameter, for each iteration step.

        maximum_episodes: int
            The maximum number of episodes to be executed. If DQN converges earlier the training stops.
        
        model_file_name: string
            The file in which the DQN trained model (DNN Keras) should be saved.
            
        converge_criteria: int or None
            The DQN converge criteria (when for converge_criteria concecutive episodes average reward 
            is > 200, the DQN assumed that has been converged).
            If None, the training continues till the maximum_episodes is reached.
            
        graphs_suffix: string
            A suffix added in the graphs file names. To be used in case of multiple trains.
        
        seed: int
            Optional Seed to be used with the OpenAI gym environment, for results reproducability.
                
        verbose: int
            Verbose level (0: None, 1: INFO, 2: DEBUG)
                
    Raises:
        -
    
    Returns:
        convergence_episode: int
            In which episode the DQN convergences 
            
        convergence_time: string (time)
            On how much time the DQN convergences
            
        Rturns None if converge_criteria is None
            
    notes:
        -
    '''

    if verbose > C_VERBOSE_NONE:
        print('\nDQN Training Starts (scenario = ',
              scenario,
              ', average_reward_episodes = ',
              average_reward_episodes,
              ', rendering = ',
              rendering,
              ', hidden_layers = ',
              hidden_layers,
              ', hidden_layers_size = ',
              hidden_layers_size,
              ', memory_size = ',
              memory_size,
              ', minibatch_size = ',
              minibatch_size,
              ', optimizer_learning_rate = ',
              optimizer_learning_rate,
              ', gamma = ',
              gamma,
              ', epsilon_decay_factor = ',
              epsilon_decay_factor,
              ', maximum_episodes = ',
              maximum_episodes,
              ', model_file_name = ',
              model_file_name,
              ', converge_criteria = ',
              converge_criteria,
              ', graphs_suffix = ',
              graphs_suffix,
              ', seed = ',
              seed,
              ')',
              sep='')

    #If seed is given the apply it
    if seed is not None:
        applySeed(seed, verbose)

    #Create a Emulator object instance
    emulator = em.Emulator(scenario,
                           average_reward_episodes,
                           statistics=True,
                           rendering=rendering,
                           seed=seed,
                           verbose=verbose)

    #Create a Deep Neural Network object instance (Keras with Tensor Flow backend)
    dnn = deepNeuralNetwork.DeepNeuralNetwork(
        inputs=emulator.state_size,
        outputs=emulator.actions_number,
        hidden_layers=hidden_layers,
        hidden_layers_size=hidden_layers_size,
        optimizer_learning_rate=optimizer_learning_rate,
        seed=seed,
        verbose=verbose)

    #Create a DQN object instance (we start always from epsilon = 1.0, we control each value with the epsilon_decay_factor
    dqn = deepQNetwork.DeepQNetwork(emulator=emulator,
                                    dnn=dnn,
                                    states_size=emulator.state_size,
                                    actions_number=emulator.actions_number,
                                    memory_size=memory_size,
                                    minibatch_size=minibatch_size,
                                    gamma=gamma,
                                    epsilon=1.0,
                                    epsilon_decay_factor=epsilon_decay_factor,
                                    seed=seed,
                                    verbose=verbose)

    #Start measuring training time
    start_time = time.time()

    if converge_criteria is not None:
        #Holds how many concecutive episodes average reward is > 200
        convergence_counter = 0
        episodes_convergence_counter = [
        ]  #Holds the convergence_counter for all episodes
        convergence_episode = 0

    #Training starts here
    for i in range(maximum_episodes):
        current_state = emulator.start()

        #See Algorithm 1 in [1]
        while emulator.emulator_started:
            action = dqn.decideAction(current_state)

            #Experience [s, a, r, s']
            experience = emulator.applyAction(action)

            dqn.storeTransition(experience)
            dqn.sampleRandomMinibatch()

            #s = s' at the end of the step, before starting the new step
            current_state = experience[3]

        if converge_criteria is not None:
            #Check if convergence counter should be increased or to be reset
            if emulator.average_reward > 200:
                convergence_counter += 1
            else:
                convergence_counter = 0

            episodes_convergence_counter.append(convergence_counter)

            if verbose > C_VERBOSE_NONE:
                print('Convergence Counter: ', convergence_counter, sep='')

            #DQN model assumed that it has been converged
            if convergence_counter >= converge_criteria:
                convergence_episode = i
                break

    if converge_criteria is not None:
        convergence_time = time.time() - start_time

    if verbose > C_VERBOSE_NONE and converge_criteria is not None:
        print('\nDQN converged after ',
              convergence_episode,
              ' episodes in ',
              executionTimeToString(convergence_time),
              sep='')
    elif verbose > C_VERBOSE_NONE and converge_criteria is None:
        print('\nDQN trained for ',
              maximum_episodes,
              ' episodes in ',
              executionTimeToString(time.time() - start_time),
              sep='')

    #Create Graphs
    #1. Steps per Episode
    plt.plot(emulator.execution_statistics.values[:, 0],
             emulator.execution_statistics.values[:, 1],
             color='coral',
             linestyle='-')
    plt.grid(b=True, which='major', axis='y', linestyle='--')
    plt.xlabel('Episode', fontsize=12)
    plt.ylabel('Steps', fontsize=12)
    plt.title('Steps per Episode', fontsize=12)
    plt.savefig('Steps_Per_Episode' + graphs_suffix + '.png')
    plt.clf()

    #2. Total Reward per Training Episode
    plt.plot(emulator.execution_statistics.values[:, 0],
             emulator.execution_statistics.values[:, 2],
             color='coral',
             linestyle='-',
             label='Total Reward')
    plt.plot(emulator.execution_statistics.values[:, 0],
             emulator.execution_statistics.values[:, 3],
             color='midnightblue',
             linestyle='--',
             label='Episodes Reward Average')
    plt.grid(b=True, which='major', axis='y', linestyle='--')
    plt.xlabel('Episode', fontsize=12)
    plt.ylabel('Reward', fontsize=12)
    plt.title('Total Reward per Training Episode', fontsize=12)
    plt.legend(loc='lower right', fontsize=12)
    plt.savefig('Total_Reward_Per_Training_Episode' + graphs_suffix + '.png')
    plt.clf()

    #Save the trained model
    dnn.saveModel(model_file_name)

    if converge_criteria is not None:
        return convergence_episode