예제 #1
0
    def load_model(self, model, env_shape, action_shape, **kwargs):
        input_layer = Input(shape=env_shape)
        m = model(input_layer)
        output = Dense(action_shape, activation='linear')(m)

        self.model = Model(input_layer, output, name='dqn_model')

        # create loss
        delta = kwargs.get('loss_delta', 1.0)
        # delta loss is the value at which
        # the huber loss function transitions from
        # a quadratic function to a linear funtion
        loss_function = kwargs.get('loss_function', 'huber')
        if loss_function == 'huber':
            loss = Huber(delta=delta)
        elif loss_function == 'mse':
            loss = MeanSquaredError()
        else:
            loss = Huber(delta=delta)

        # create optimizer
        LR = kwargs.get('LR', 0.001)
        # learning rate
        LR_decay1 = kwargs.get('LR_decay1', 0.9)
        # Learning rate decay
        LR_decay2 = kwargs.get('LR_decay2', 0.999)
        # The exponential decay rate for the 2nd moment estimates
        optimizer = Adam(learning_rate=LR, beta_1=LR_decay1, beta_2=LR_decay2)

        self.model.compile(optimizer=optimizer,
                           loss=loss,
                           metrics=kwargs.get('metrics'))
        print(self.model.summary())
예제 #2
0
    def learn(self, gamma: float, frame_number: int, priority_scale: float=1.0) -> Tuple[float, np.ndarray]:
        if self.use_per:
            (states, actions, rewards, new_states, terminal), importance, indices = self.replay_buffer.get_minibatch(self.batch_size, priority_scale=priority_scale)
            importance = importance ** (1-self.calc_epsilon(frame_number))
        else:
            states, actions, rewards, new_states, terminal = self.replay_buffer.get_minibatch(self.batch_size, priority_scale=priority_scale)
        
        main_q_values = self.main_q.predict(new_states).argmax(axis=1)
        
        future_q_values = self.target_q.predict(new_states)
        double_q = future_q_values[range(self.batch_size), main_q_values] # bad use of range
        
        target_q = rewards + (gamma * double_q * (1 - terminal)) # error with terminal
        
        with GradientTape() as tape:
            q_values = self.main_q(states)
            
            #
            one_hot_actions = to_categorical(actions, self.n_actions, dtype=np.float32)
            Q = tf.reduce_sum(tf.multiply(q_values, one_hot_actions), axis=1)
            
            error = Q - target_q
            loss = Huber()(target_q, Q)
            
            if self.use_per:
                loss = tf.reduce_mean(loss * importance)

        gradients = tape.gradient(loss, self.main_q.trainable_variables)
        self.main_q.optimizer.apply_gradients(zip(gradients, self.main_q.trainable_variables))
        
        if self.use_per:
            self.replay_buffer.set_priorities(indices, error)
        
        return float(loss.numpy()), error
예제 #3
0
    def __init__(
        self, 
        # main_q: Model, 
        # target_q: Model,
        # replay_memory: ReplayMemory,
        n_actions: int,
        input_shape: Tuple = (84, 84),
        batch_size: int=32,
        history_length: int=4,
        learning_rate: float=0.00001,
        eps_initial: int=1,
        eps_final: float=0.1,
        eps_final_frame: float=0.0,
        eps_evaluation: float=0.0,
        eps_annealing_frames: int=1000000,
        replay_buffer_size: int = 1000000,
        replay_buffer_start_size: int=50000,
        max_frames: int=25000000,
        use_per: bool=True) -> None:

        self.n_actions = n_actions
        self.input_shape = input_shape
        self.history_length = history_length
        self.learning_rate = learning_rate
        self.replay_buffer_start_size = replay_buffer_start_size
        self.max_frames = max_frames
        self.batch_size = batch_size
        # self.replay_buffer = replay_memory
        self.use_per = use_per
        self.eps_initial = eps_initial
        self.eps_final = eps_final
        self.eps_final_frame = eps_final_frame
        self.eps_evaluation = eps_evaluation
        self.eps_annealing_frames = eps_annealing_frames
        self.replay_buffer_size = replay_buffer_size
        
        self.slope = -(self.eps_initial - self.eps_final) / self.eps_annealing_frames
        self.intercept = self.eps_initial - self.slope*self.replay_buffer_start_size
        self.slope_2 = -(self.eps_final - self.eps_final_frame) / (self.max_frames - self.eps_annealing_frames - self.replay_buffer_start_size)
        self.intercept_2 = self.eps_final_frame - self.slope_2*self.max_frames
        
        self.replay_buffer: ReplayMemory = ReplayMemory(
                                                size=self.replay_buffer_size,
                                                input_shape=self.input_shape,
                                                history_length=self.history_length,
                                                use_per=self.use_per)
        
        # self.main_q: Model = DuelingDQN(self.n_actions, self.input_shape, self.history_length)
        # self.target_q: Model = DuelingDQN(self.n_actions, self.input_shape, self.history_length)
        
        # self.main_q.build((self.input_shape[0], self.input_shape[1], self.history_length))
        # self.target_q.build((self.input_shape[0], self.input_shape[1], self.history_length))
        
        self.main_q = build_q_network(self.n_actions, self.input_shape, self.history_length)
        self.target_q = build_q_network(self.n_actions, self.input_shape, self.history_length)
        
        self.main_q.compile(optimizer=Adam(self.learning_rate), loss=Huber())
        self.target_q.compile(optimizer=Adam(self.learning_rate), loss=Huber())
예제 #4
0
 def __init__(self,
              alpha=0.031157,
              delta=0.13907,
              epsilon_decay=0.99997,
              eta=0.044575,
              gamma=0.013082,
              learning_rate=0.050023):
     super().__init__("DQN")
     self.action_size = State.ACTION_SIZE
     self.state_size = State.STATE_SIZE
     self.memory_rl = PrioritizedReplayBuffer(2000000)
     self.memory_sl = SupervisedMemory(2000000)
     self.batch_size = 512
     self.model_update_frequency = 10
     self.model_save_frequency = 100
     self.alpha = alpha  # Pred opt: 0.7
     self.delta = delta  # Pred opt: 0.5
     self.epsilon = 1
     self.epsilon_min = 0.001
     self.epsilon_decay = epsilon_decay  # Pred opt: 0.99999
     self.gamma = gamma  # 0 # 0.029559  # Pred opt: 0.01
     self.learning_rate = learning_rate  # Pred opt: 0.1
     self.learning_rate_sl = 0.005
     self.eta = eta  # Pred opt: 0.1
     self.number_of_episodes = 1000
     self.reduce_lr = ReduceLROnPlateau(monitor='loss',
                                        factor=0.1,
                                        patience=5,
                                        min_lr=0)
     self.policy_network = self.build_model(
         self.learning_rate, 'linear',
         Huber(reduction=Reduction.SUM, delta=self.delta))
     self.target_network = self.build_model(
         self.learning_rate, 'linear',
         Huber(reduction=Reduction.SUM, delta=self.delta))
     self.target_network.set_weights(self.policy_network.get_weights())
     self.supervised_learning_network = self.build_model(
         self.learning_rate_sl, 'softmax',
         tf.keras.losses.sparse_categorical_crossentropy)
     self.total_rewards_p1 = []
     self.total_rewards_p2 = []
     self.losses = []
     self.steps = 0
     self.p2 = AgentPlaceholder()
     self.rounds = 0
     self.n_batches = 0
     self.save_model = True
     self.env = UnoEnvironment(False)
예제 #5
0
    def build_model(self) -> Sequential:
        """
        Constructs and returns a Convolutional Deep-Q-Network
        """
        model = Sequential()

        model.add(
            Conv2D(filters=8,
                   kernel_size=4,
                   strides=(2, 2),
                   padding="valid",
                   activation="relu",
                   input_shape=self.image_shape))

        #model.add(Conv2D(filters=64, kernel_size=4, strides=(2,2),
        #                 padding="valid", activation="relu"))

        #model.add(Conv2D(filters=64, kernel_size=3, strides=(1,1),
        #                 padding="valid", activation="relu"))

        model.add(Flatten())
        model.add(Dense(units=128, activation="relu"))
        model.add(Dense(self.action_size))
        model.compile(loss=Huber(),
                      optimizer=Adam(learning_rate=0.01),
                      metrics=["accuracy"])
        return model
예제 #6
0
    def __rossNet(self):
        '''
        Notes
        ------------
        Ref: https://doi.org/10.1029/2017JB015251 
        '''
        model = Sequential()
        model.add(Conv1D(
            32,
            21,
            activation='relu',
        ))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))

        model.add(Conv1D(64, 15, activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))

        model.add(Conv1D(128, 11, activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))

        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(512, activation='relu'))
        model.add(Dense(1, activation='linear'))

        model.compile(loss=Huber(), optimizer=Adam())

        return model
예제 #7
0
파일: model.py 프로젝트: TTomilin/GVizDoom
def dueling_dqn(input_shape: Tuple[int], action_size: int,
                learning_rate: float, noisy: bool) -> Model:
    # Build the convolutional network section and flatten the output
    state_input, x = build_base_cnn(input_shape, noisy)

    # Determine the type of the fully collected layer
    dense_layer = NoisyDense if noisy else Dense

    # State value tower - V
    state_value = dense_layer(256,
                              activation='relu',
                              kernel_initializer=he_uniform())(x)
    state_value = dense_layer(1, kernel_initializer=he_uniform())(state_value)
    state_value = Lambda(lambda s: K.expand_dims(s[:, 0], axis=-1),
                         output_shape=(action_size, ))(state_value)

    # Action advantage tower - A
    action_advantage = dense_layer(256,
                                   activation='relu',
                                   kernel_initializer=he_uniform())(x)
    action_advantage = dense_layer(
        action_size, kernel_initializer=he_uniform())(action_advantage)
    action_advantage = Lambda(
        lambda a: a[:, :] - K.mean(a[:, :], keepdims=True),
        output_shape=(action_size, ))(action_advantage)

    # Merge to state-action value function Q
    state_action_value = add([state_value, action_advantage])

    model = Model(inputs=state_input, outputs=state_action_value)
    model.compile(loss=Huber(), optimizer=Adam(lr=learning_rate))
    return model
    def dueling_build_model(input_dimension, output_dimension, nodes_per_layer, hidden_layer_count, learning_rate):
        inputs = Input(shape=(input_dimension,))

        # Build Advantage layer
        advantage_hidden_layer = inputs
        for _ in range(hidden_layer_count):
            advantage_hidden_layer = Dense(nodes_per_layer, activation='relu')(advantage_hidden_layer)
        predictions_advantage = Dense(output_dimension, activation='linear')(advantage_hidden_layer)

        # Build Value layer
        value_hidden_layer = inputs
        for _ in range(hidden_layer_count):
            value_hidden_layer = Dense(nodes_per_layer, activation='relu')(value_hidden_layer)
        predictions_value = Dense(1, activation='linear')(value_hidden_layer)

        # Combine layers
        advantage_average = Lambda(mean)(predictions_advantage)

        advantage = Subtract()([predictions_advantage, advantage_average])

        predictions = Add()([advantage, predictions_value])

        model = Model(inputs=inputs, outputs=predictions)
        model.compile(optimizer=Adam(lr=learning_rate), loss=Huber())
        return model
예제 #9
0
    def __init__(self,
                 name,
                 alpha,
                 gamma,
                 input_layer_size,
                 number_of_parameters,
                 out_layer_size,
                 memory_size=10000,
                 batch_size=32):

        config = configparser.ConfigParser()
        config.read('configuration/agent_config.ini')
        config.sections()

        enable_model_load = config['model_weights'].getboolean(
            'enable_load_model_weights')
        self.enable_model_save = config['model_weights'].getboolean(
            'enable_save_model_weights')
        self.tensorboard_visualization = config['tensorboard'].getboolean(
            'enable_dqn')

        # Hyperparameters
        self.memory = MemoryBuffer(max_size=memory_size,
                                   number_of_parameters=number_of_parameters)
        self.gamma = gamma
        self.learning_rate = alpha
        self.batch_size = batch_size
        self.out_layer_size = out_layer_size
        self.action_space = [i for i in range(out_layer_size)]
        loss = Huber()
        optimizer = 'adam'

        # Epsilon Greedy Strategy
        self.epsilon = 1.0  # enable epsilon = 1.0 only when changing model, else learned weights from .h5 are used.
        self.epsilon_decay = 0.9985
        self.epsilon_min = 0.005

        # Keras Models
        hl1_dims = 128
        hl2_dims = 64
        hl3_dims = 64

        self.dqn_eval = self._build_model(hl1_dims, hl2_dims, hl3_dims,
                                          input_layer_size, out_layer_size,
                                          optimizer, loss)

        if self.tensorboard_visualization:
            comment = 'adam-huber-reward_per'
            path = config['tensorboard']['file_path']
            tboard_name = '{}{}-cmt-{}_hl1_dims-{}_hl2_dims-{}-time-{}'.format(
                path, name, comment, hl1_dims, hl2_dims, int(time.time()))
            self.tensorboard = TensorBoard(tboard_name.format())

        self.keras_weights_filename = '{}.keras'.format(name)
        self.model_loaded = False

        if enable_model_load:
            self.load_model()
        else:
            print('Applying epsilon greedy strategy')
 def __init__(self, obs_size=4, action_size=2, lr=0.001): # TODO
     self.model = Sequential()
     self.model.add(Dense(16, activation='relu', input_dim=obs_size))
     self.model.add(Dense(16, activation='relu'))
     self.model.add(Dense(16, activation='relu'))
     self.model.add(Dense(action_size, activation='linear'))
     self.model.compile(loss=Huber(), optimizer=Adam(lr=lr))
예제 #11
0
def learning(model, code, train_data, valid_data):
    global FILE_PATH
    loss = Huber()
    optimizer = Adam(0.0005)
    model.compile(loss=loss, optimizer=optimizer, metrics=['mse'])
    earlystopping = EarlyStopping(monitor='val_loss', patience=10)
    foldername = FILE_PATH + '/' + code + '/'
    filename = os.path.join(foldername, code + '.ckpt')

    checkpoint = ModelCheckpoint(filename,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 monitor='val_loss',
                                 verbose=1)

    print('IN learning, print model',
          model)  # 디버깅 위해 작성 (add_layer 함수 제대로 동작하나)
    for data in train_data.take(1):
        print('train_data X shape( BATCH_SIZE, WINDOW_SIZE, feature ) :',
              data[0].shape)
        print('train_data Y shape( BATCH_SIZE, WINDOW_SIZE, feature ) :',
              data[1].shape)
    for data in valid_data.take(1):
        print('valid_data X shape( BATCH_SIZE, WINDOW_SIZE, feature ) :',
              data[0].shape)
        print('valid_data Y shape( BATCH_SIZE, WINDOW_SIZE, feature ) :',
              data[1].shape)

    history = model.fit(train_data,
                        validation_data=(valid_data),
                        epochs=200,
                        callbacks=[checkpoint, earlystopping])

    print('End learning !')
예제 #12
0
    def regression_loss(self, gt_bboxes, pred_bboxes, positive_mask):
        """
        Apply regression function to the classes in order to compute the localization loss

        Parameters
        ----------
        gt_bboxes: ground truth bboxes
        pred_bboxes: predicted bboxes
        positive_mask: boolean mask for positive examples

        Return
        ------
        l_loc: localization loss
        """
        if self.regression_type == 'smooth_l1':
            localization_loss = \
                Huber(delta=1.0, reduction='sum')
            l_loc = localization_loss(gt_bboxes[positive_mask],
                                      pred_bboxes[positive_mask])
        elif self.regression_type == 'DIoU' or self.regression_type == 'CIoU':
            pred_bboxes = decode_boxes(self.default_boxes,
                                       pred_bboxes)[positive_mask]
            gt_bboxes = decode_boxes(self.default_boxes,
                                     gt_bboxes)[positive_mask]
            l_loc = self.iou_loss(gt_bboxes, pred_bboxes)
        return l_loc
예제 #13
0
    def _build_model(self):
        """
        Builds a CNN model that will be used by the agent to predict Q-values.

        Returns
        -------
        A compiled Keras model with Adam Optimizer and Huber loss.
        """
        input = Input(shape=self._STATE_SPACE)
        x = Sequential([
            Conv2D(filters=32,
                   kernel_size=(8, 8),
                   strides=(4, 4),
                   padding='valid',
                   activation='relu',
                   kernel_initializer=VarianceScaling(scale=2.0)),
            Conv2D(filters=64,
                   kernel_size=(4, 4),
                   strides=(2, 2),
                   activation='relu',
                   padding='valid',
                   kernel_initializer=VarianceScaling(scale=2.0)),
            Conv2D(filters=64,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   activation='relu',
                   padding='valid',
                   kernel_initializer=VarianceScaling(scale=2.0)),
            Conv2D(filters=1024,
                   kernel_size=(7, 7),
                   strides=(1, 1),
                   activation='relu',
                   padding='valid',
                   kernel_initializer=VarianceScaling(scale=2.0)),
        ])(input)

        value_tensor, advantage_tensor = Lambda(
            lambda x: tf.split(x, 2, axis=3))(x)

        value_tensor = Flatten()(value_tensor)
        advantage_tensor = Flatten()(advantage_tensor)

        advantage = Dense(
            self._NUM_ACTIONS,
            kernel_initializer=VarianceScaling(scale=2.0))(advantage_tensor)
        value = Dense(
            1, kernel_initializer=VarianceScaling(scale=2.0))(value_tensor)

        mean_advantage = Lambda(
            lambda x: tf.reduce_mean(x, axis=1, keepdims=True))(advantage)
        normalized_advantage = Subtract()([advantage, mean_advantage])

        output = Add()([value, normalized_advantage])

        model = Model(inputs=input, outputs=output)
        optimizer = Adam(1e-5)
        loss = Huber(delta=1.0)
        model.compile(optimizer=optimizer, loss=loss)
        return model
def get_best_learningrate(model):
    lr_scheduler = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch: 1e-8 * 10**(epoch / 20))
    model.compile(loss=Huber(), optimizer=SGD(lr=1e-8), metrics=['mae'])
    history = model.fit(training_set, epochs=100, callbacks=[lr_scheduler])
    lrs = 1e-8 * 10**(np.arange(100) / 20)

    plt.semilogx(history.history['lr'], history.history['loss'])
    plt.show()
예제 #15
0
파일: model.py 프로젝트: huyyi/bishe
def construct_model():
    # 天气预测
    weather_in = Input(shape=(360, 17), name='weather_in')
    # 使用L2正规化防止过拟合
    rglrz = l2(1e-4)

    # 三层 Bidirectional LSTM堆叠
    lstm = Bidirectional(LSTM(32,
                              kernel_regularizer=rglrz,
                              recurrent_regularizer=rglrz,
                              bias_regularizer=rglrz,
                              recurrent_initializer='glorot_uniform',
                              return_sequences=True,
                              name='lstm1'),
                         merge_mode='concat',
                         name='Bid_1')(weather_in)

    lstm = Bidirectional(LSTM(32,
                              kernel_regularizer=rglrz,
                              recurrent_regularizer=rglrz,
                              bias_regularizer=rglrz,
                              recurrent_initializer='glorot_uniform',
                              return_sequences=True,
                              name='lstm2'),
                         merge_mode='concat',
                         name='Bid_2')(lstm)

    lstm = Bidirectional(LSTM(32,
                              kernel_regularizer=rglrz,
                              recurrent_regularizer=rglrz,
                              bias_regularizer=rglrz,
                              recurrent_initializer='glorot_uniform',
                              return_sequences=False,
                              name='lstm3'),
                         merge_mode='concat',
                         name='Bid_3')(lstm)

    # 全联接网络产生预测结果
    prediction = Dense(1,
                       kernel_initializer='glorot_uniform',
                       kernel_regularizer=rglrz,
                       name='prediction')(lstm)

    # 地理位置情况
    county_in = Input(shape=(88, ), name='county_in')
    county = Dense(32, activation='tanh', name='county1')(county_in)
    county = Dense(1, activation='sigmoid', name='county2')(county)

    # 合并两个网络的结果,使用乘
    pre_county = Multiply(name='pre_county')([prediction, county])
    merge_model = Model(inputs=[weather_in, county_in],
                        outputs=pre_county,
                        name='merge_model')

    # 编译
    merge_model.compile(optimizer=Adam(), loss=Huber(), metrics=[mse])
    return merge_model
예제 #16
0
    def __init__(self, name, alpha, gamma, input_layer_size, number_of_parameters, out_layer_size, memory_size=50000,
                 batch_size=64):

        config = configparser.ConfigParser()
        config.read('configuration/agent_config.ini')
        config.sections()

        enable_model_load = config['model_weights'].getboolean('enable_load_model_weights')
        self.enable_model_save = config['model_weights'].getboolean('enable_save_model_weights')
        self.tensorboard_visualization = config['tensorboard'].getboolean('enable_ddqnper')

        # Hyperparameters
        self.memory = MemoryBuffer(max_size=memory_size, number_of_parameters=input_layer_size, with_per=True)
        self.with_per = True
        self.gamma = gamma
        self.learning_rate = alpha
        self.batch_size = batch_size
        self.out_layer_size = out_layer_size
        self.replace_target_network_after = config['model_settings'].getint('ddqn_replace_network_interval')
        self.action_space = [i for i in range(out_layer_size)]
        self.priority_offset = 0.1  # used for priority, as we do not want to have priority 0 samples
        self.priority_scale = 0.7  # priority_scale, suggested by Paper

        loss = Huber()
        optimizer = Adam(learning_rate=alpha)

        # Epsilon Greedy Strategy
        self.epsilon = 1.0  # enable epsilon = 1.0 only when changing model, else learned weights from .h5 are used.
        self.epsilon_decay = 0.9985
        self.epsilon_min = 0.005

        # Keras Models
        hl1_dims = 128
        hl2_dims = 64
        hl3_dims = 64

        self.dqn_eval = self._build_model(hl1_dims, hl2_dims, hl3_dims, input_layer_size, out_layer_size, optimizer,
                                          loss)
        self.dqn_target = self._build_model(hl1_dims, hl2_dims, hl3_dims, input_layer_size, out_layer_size, optimizer,
                                            loss)
        # self.history = History()

        if self.tensorboard_visualization:
            comment = 'adam-huber-reward_per2'
            path = config['tensorboard']['file_path']
            tboard_name = '{}{}-cmt-{}_hl1_dims-{}_hl2_dims-{}_hl3_dims-{}-time-{}'.format(path, name, comment,
                                                                                           hl1_dims,
                                                                                           hl2_dims, hl3_dims,
                                                                                           int(time.time()))
            self.tensorboard = TensorBoard(tboard_name.format())

        self.keras_weights_filename = '{}.keras'.format(name)
        self.model_loaded = False
        if enable_model_load:
            self.load_model()
        else:
            print('Applying epsilon greedy strategy')
예제 #17
0
파일: dqn.py 프로젝트: auloin/connect-four
    def _build_model(self):
        model = Sequential()
        model.add(Flatten(input_shape=self.input_shape))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss=Huber(), optimizer=Adam(learning_rate=self.lr))

        return model
예제 #18
0
 def __init__(self, img_shape: tuple, num_actions: int,
              learning_rate: float):
     super().__init__()
     self.img_shape = img_shape
     self.num_actions = num_actions
     self.learning_rate = learning_rate
     self.loss = Huber()
     self.optimizer = RMSprop(learning_rate=0.00025, rho=0.95, epsilon=0.01)
     self.internal_model = self.build_model()
예제 #19
0
 def __init__(self, env: '', episodes=1000, alpha=0.01, gamma=0.9, alpha_decay_rate=0.9):
     self.env = Environment(env=env)
     self.episodes = episodes
     self.lr = ExponentialDecay(alpha, episodes, alpha_decay_rate)
     self.optimizer = Adam(self.lr)
     self.action_count, self.states_count = self.env.spaces_count()
     self.gamma = gamma
     self._net = ReinforcePolicyNet(action_count=self.action_count, states_count=self.states_count)
     self._model = ReinforcePolicyModel(self._net)
     self._agent = ReinforcePolicyAgent(env=self.env, model=self._model, gamma=gamma)
     self.huber_loss = Huber(reduction=tf.keras.losses.Reduction.SUM)
예제 #20
0
def nvidia():

	"""
	Implementation of Nvidia's End-to-End Learning model for Self-driving cars
	"""

	global X_train, y_train

	# Model Design

	inputs = Input(shape=(160,320,3))
	cropped = Cropping2D(cropping=((64, 0), (0, 0)))(inputs)
	resized_input = Lambda(lambda image: tf.image.resize(image, (66,200)))(cropped)
	normalize_layer = LayerNormalization(axis=1)(resized_input)
	conv1 = Conv2D(filters=24, kernel_size=5, strides=(2,2), activation='relu')(normalize_layer)
	conv2 = Conv2D(filters=36, kernel_size=5, strides=(2,2), activation='relu')(conv1)
	conv3 = Conv2D(filters=48, kernel_size=5, strides=(2,2), activation='relu')(conv2)
	conv4 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv3)
	conv5 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv4)
	flatten = Flatten()(conv5)
	dense1 = Dense(100,activation='relu')(flatten)
	dense2 = Dense(50,activation='relu')(dense1)
	dense3 = Dense(10,activation='relu')(dense2)
	out = Dense(1, activation='linear')(dense3)

	# Specifications and training

	checkpoint = ModelCheckpoint(filepath="./ckpts/model_nvidia.h5", monitor='val_loss', save_best_only=True)
	stopper = EarlyStopping(monitor='val_loss', min_delta=0.0003, patience = 10)

	lr_schedule = ExponentialDecay(initial_learning_rate=0.0001, decay_steps=100000, decay_rate=0.95)
	optimizer = Adam(learning_rate=lr_schedule)
	loss = Huber(delta=0.5, reduction="auto", name="huber_loss")
	t2 = time()
	model = Model(inputs=inputs, outputs=out)
	model.compile(loss = loss, optimizer = optimizer)
	result = model.fit(X_train, y_train, validation_split = 0.2, shuffle = True,
		epochs = 100, callbacks=[checkpoint, stopper])

	# Visualization of loss variations across epochs

	plt.plot(result.history['loss'])
	plt.plot(result.history['val_loss'])
	plt.title('Huber Loss')
	plt.ylabel('Loss')
	plt.xlabel('Epoch')
	plt.legend(['Training set', 'Validation set'], loc = 'upper right')
	plt.savefig('loss.png')
	plt.show()

	print("Time taken to train: {:.2f}s".format(time()-t2))

	model.load_weights('./ckpts/model_nvidia.h5')
	model.save('model.h5')
예제 #21
0
파일: model.py 프로젝트: TTomilin/GVizDoom
def dqn(input_shape: Tuple[int], action_size: int, learning_rate: float,
        noisy: bool) -> Model:
    # Build the convolutional network section and flatten the output
    state_input, state_hidden = build_base_cnn(input_shape, noisy)

    dense_layer = NoisyDense if noisy else Dense
    output = dense_layer(action_size, activation='linear')(state_hidden)

    model = Model(inputs=state_input, outputs=output)
    model.compile(loss=Huber(), optimizer=Adam(lr=learning_rate))
    return model
예제 #22
0
def train_spatial_temporal_model(
        model,
        dataset_generator,
        opt='adam',
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
        include_tb=False):  # validation_data, val_steps = VALIDATION_STEPS,

    ## Early stopping
    earlystopping = EarlyStopping(monitor='loss',
                                  min_delta=0.00001,
                                  patience=10,
                                  restore_best_weights=True)  # val_loss

    # Automatically save latest best model to file
    filepath = repo_path + "models/model_saves/" + PRED_TAR + '/' + RUN_ID + ".hdf5"
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='loss',
                                 verbose=0,
                                 save_best_only=True,
                                 mode='min')

    # Set callbacks
    callbacks_list = [checkpoint, earlystopping]

    # Include tensorboard
    if include_tb:
        tensorboard_cb = tf.keras.callbacks.TensorBoard(get_run_logdir())
        callbacks_list.extend([tensorboard_cb])

    # Optimizers
    optimizers = {
        'adam': Adam(learning_rate=0.001,
                     beta_1=0.9,
                     beta_2=0.999,
                     amsgrad=False)
    }

    model.compile(loss='mean_absolute_error',
                  optimizer=optimizers[opt],
                  metrics=[mae, RootMeanSquaredError(),
                           Huber()])

    # Fit model #x = [spatial_train, temporal_train_x], y = temporal_train_y,
    history = model.fit(
        dataset_generator,
        epochs=epochs,
        use_multiprocessing=True,
        # validation_data = validation_data, validation_steps = val_steps,
        steps_per_epoch=steps_per_epoch,
        verbose=1,
        callbacks=callbacks_list)
    return (history)
예제 #23
0
    def _build_model(self):
        state_size = self.state_size
        action_size = self.action_size
        layers = self.options['layers']
        # Neural Net for Deep-Q learning Model
        model = Sequential()

        model.add(Dense(layers[0], input_dim=state_size, activation='relu'))
        for l in layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(action_size, activation='linear'))
        model.compile(loss=Huber(), optimizer=Adam(lr=self.options['alpha']))
        return model
예제 #24
0
 def model(self):
     model = Sequential()
     model.add(Conv2D(filters=32, kernel_size=8, strides=4, activation='relu',
                      input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)))
     model.add(Conv2D(filters=64, kernel_size=4, strides=2, activation='relu'))
     model.add(Conv2D(filters=64, kernel_size=3, strides=1, activation='relu'))
     model.add(Flatten())
     model.add(Dense(units=512, activation='relu'))
     model.add(Dense(units=number_of_actions, activation='linear'))
     opt = RMSprop(learning_rate=LEARNING_RATE, rho=GRADIENT_MOMENTUM, epsilon=MIN_SQUARED_GRADIENT)
     model.compile(loss=Huber(), optimizer=opt)
     print(model.summary())
     return model
 def vanilla_build_model(input_dimension, output_dimension, nodes_per_layer, hidden_layer_count, learning_rate):
     inputs = Input(shape=input_dimension)
     hidden_layer = inputs
     for _ in range(hidden_layer_count):
         hidden_layer = Dense(nodes_per_layer, activation='relu')(hidden_layer)
         # TODO explore batchnorm in RL.
         #hidden_layer = BatchNormalization()(hidden_layer)
     predictions = Dense(output_dimension, activation='linear')(hidden_layer)
     model = Model(inputs=inputs, outputs=predictions)
     # TODO do more testing on MSE vs Huber
     #model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate, epsilon=1.5e-4), loss=tf.keras.losses.Huber())
     model.compile(optimizer=Adam(lr=learning_rate), loss=Huber())
     return model
예제 #26
0
def create_lstm_network(env, *_, lr, layer_nodes, time_steps):
    """ Builds an LSTM DQN network fitting the environment's state/action spaces.
    """
    state_size = env.observation_space.shape[0]
    nA = env.action_space.n

    model = Sequential()
    model.add(LSTM(layer_nodes[0], input_shape=(time_steps, state_size)))
    for num_nodes in layer_nodes[1:]:
        model.add(Dense(num_nodes, activation='relu'))
    model.add(Dense(nA, activation='linear'))
    model.compile(loss=Huber(), optimizer=Adam(lr=lr), loss_weights=1.0)

    return model
예제 #27
0
파일: rl_agent.py 프로젝트: gandroz/rl-taxi
    def compile(self, optimizer=None, loss=Huber()):
        # lr_schedule = ExponentialDecay(
        #                         initial_learning_rate=self.config.learning_rate,
        #                         decay_steps=self.config.lr_decay_steps,
        #                         decay_rate=self.config.lr_decay,
        #                         lr_min=self.config.lr_min)
        # optimizer = Adam(learning_rate=lr_schedule)
        if optimizer is None:
            optimizer = Adam(learning_rate=self.config.learning_rate)

        self.target_model = clone_model(self.model)
        self.target_model.compile(optimizer='sgd', loss='mse')

        self.model.compile(loss=loss,
                           optimizer=optimizer,
                           metrics=['accuracy'])
예제 #28
0
 def __get_loss(loss, num_of_classes):
     if loss == 'cross_entropy':  # default value.
         return CategoricalCrossentropy(
         )  # if num_of_classes != 2 else BinaryCrossentropy()
     elif loss == 'binary_cross_entropy"':
         return BinaryCrossentropy()
     elif loss == 'cosine_similarity':
         return CosineSimilarity()
     elif loss == 'mean_absolute_error':
         return MeanAbsoluteError()
     elif loss == 'mean_squared_error':
         return MeanSquaredError()
     elif loss == 'huber':
         return Huber()
     else:
         raise ValueError('loss type does not exist.')
예제 #29
0
 def __init__(self, alpha=0.034657,
              delta=0.20752,
              epsilon_decay=0.99991,
              eta=0.096408,
              gamma=0.077969,
              learning_rate=0.00849):
     super().__init__(alpha,
                      delta,
                      epsilon_decay,
                      eta,
                      gamma,
                      learning_rate)
     self.policy_network = DuelingQNetwork(self.action_size)
     self.target_network = DuelingQNetwork(self.action_size)
     self.policy_network.compile(optimizer=keras.optimizers.Adam(), loss=Huber(reduction=Reduction.SUM))
     self.update_network()
예제 #30
0
def learning(model, code, train_input, train_output, valid_input, valid_output):
    global BATCH_SIZE, FILE_PATH

    loss = Huber()
    optimizer = Adam(0.0005)
    model.compile(loss=loss, optimizer=optimizer, metrics=['mse'])
    early_stop = EarlyStopping(monitor='val_loss', patience=10)
    filename = os.path.join(FILE_PATH, code+'.h5')
    checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

    history = model.fit(train_input, train_output, 
                        epochs=200, 
                        batch_size=BATCH_SIZE,
                        validation_data=(valid_input, valid_output), 
                        callbacks=[early_stop, checkpoint])

    print('End Learning !\n')