class PPOValueBrain: def __init__( self, learning_rate: float = 0.0001, hidden_layers_count: int = 0, neurons_per_hidden_layer: int = 0, ): self.model = Sequential() for i in range(hidden_layers_count): self.model.add(Dense(neurons_per_hidden_layer, activation=tanh)) self.model.add(Dense(1, activation=linear, use_bias=True)) self.model.compile(loss=mse, optimizer=Adam(lr=learning_rate)) def predict(self, state: np.ndarray) -> np.ndarray: return self.model.predict(np.array((state,)))[0] def train(self, states: np.ndarray, targets: np.ndarray): self.model.train_on_batch(states, targets) def save_model(self, filename: str): self.model.save(f"{filename}_critic.h5") def load_model(self, filename: str): self.model = load_model(filename)
def build_model(): model = Sequential() model.add(Conv2D(64, (5, 5), (1, 1), "SAME", activation="relu", input_shape=(306, 408, 3))) model.add(MaxPool2D((3, 3), (2, 2), 'same')) model.add(Conv2D(64, (5, 5), (1, 1), "SAME", activation="relu")) model.add(MaxPool2D((3, 3), (2, 2), 'same')) model.add(Conv2D(64, (5, 5), padding="SAME", activation='relu')) model.add(MaxPool2D((3, 3), (2, 2), 'same')) model.add(Conv2D(16, (5, 5), padding="SAME", activation='relu')) model.add(MaxPool2D((3, 3), (2, 2), 'same')) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(512, activation='relu')) model.add(Dense(8, activation='relu')) optimizer = Adadelta() model.compile(optimizer, loss=mean_squared_error) print(model.summary()) train_X, train_y = GET_DATA.get_batches_data() cost_values = [] for step in range(1000): cost = model.train_on_batch(train_X, train_y) cost_values.append(cost) if step % 10 == 0: print("step %d , cost value is %.3f" % (step, cost)) model.save("./model1.h5") plt.plot(cost_values) plt.show()
class Reinforce_with_mean_baselineValueBrain: def __init__(self, learning_rate: float = 0.0001, hidden_layers_count: int = 0, neurons_per_hidden_layer: int = 0): self.model = Sequential() for i in range(hidden_layers_count): self.model.add(Dense(neurons_per_hidden_layer, activation=tanh)) self.model.add(Dense(1, activation=softmax, use_bias=True)) self.model.compile(loss=mse, optimizer=Adam(lr=learning_rate)) def predict(self, state: np.ndarray) -> np.ndarray: return self.model.predict(np.array((state, )))[0] def train(self, states: np.ndarray, targets: np.ndarray): self.model.train_on_batch(states, targets)
class DQNBrain: def __init__( self, output_dim: int, learning_rate: float = 0.0001, hidden_layers_count: int = 0, neurons_per_hidden_layer: int = 0, activation: str = "tanh", using_convolution: bool = False, ): self.model = Sequential() if using_convolution: self.model.add(Conv2D(64, kernel_size=3, activation=activation)) self.model.add(Conv2D(32, kernel_size=3, activation=activation)) self.model.add(Flatten()) self.model.add(Dense(neurons_per_hidden_layer, activation=activation)) else: for _ in range(hidden_layers_count): self.model.add(Dense(neurons_per_hidden_layer, activation=activation)) self.model.add(Dense(output_dim, activation=linear, use_bias=False)) self.model.compile(loss=mse, optimizer=Adam(lr=learning_rate)) def predict(self, state: np.ndarray) -> np.ndarray: return self.model.predict(np.array((state,)))[0] def train(self, state: np.ndarray, chosen_action_mask: np.ndarray, target: float): target_vec = chosen_action_mask * target + ( 1 - chosen_action_mask ) * self.predict(state) self.model.train_on_batch(np.array((state,)), np.array((target_vec,))) def save_model(self, filename: str): self.model.save(f"{filename}.h5") def load_model(self, filename: str): self.model = load_model(filename)
def test_forward_works_with_mask(numpy_crf): logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) tags = np.array([ [2, 3, 4], [3, 2, 2] ]) # Use the CRF Module with fixed transitions to compute the log_likelihood crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, boundary_initializer=initializers.Constant(boundary_transitions), name="crf_layer" ) # Use a non-trivial mask mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) result = model.train_on_batch(logits, tags) numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions) expected = numpy_crf_instance.compute_log_likehood(tags) / -2 assert result == approx(expected)
class DQNBrain: def __init__(self, output_dim: int, learning_rate: float = 0.0001, hidden_layers_count: int = 0, neurons_per_hidden_layer: int = 0): self.model = Sequential() for i in range(hidden_layers_count): self.model.add(Dense(neurons_per_hidden_layer, activation=tanh)) self.model.add(Dense(output_dim, activation=linear, use_bias=False)) self.model.compile(loss=mse, optimizer=Adam(lr=learning_rate)) def predict(self, state: np.ndarray) -> np.ndarray: return self.model.predict(np.array((state, )))[0] def train(self, state: np.ndarray, chosen_action_mask: np.ndarray, target: float): target_vec = chosen_action_mask * target + \ (1 - chosen_action_mask) * self.predict(state) self.model.train_on_batch(np.array((state, )), np.array( (target_vec, )))
def regression(x_data, y_data): # 构建一个顺序模型 model = Sequential() # 在模型中添加一个全连接层 # 神经网络结构:1-10-1,即输入层为1个神经元,隐藏层10个神经元,输出层1个神经元。 # 激活函数加法1 model.add(Dense(units=10, input_dim=1)) model.add(Activation('tanh')) model.add(Dense(units=1)) model.add(Activation('tanh')) # 激活函数加法2 # model.add(Dense(units=10, input_dim=1, activation='relu')) # model.add(Dense(units=1, activation='relu')) # 定义优化算法 sgd = SGD(lr=0.3) # sgd: Stochastic gradient descent,随机梯度下降法 # mse: Mean Squared Error, 均方误差 model.compile(optimizer=sgd, loss='mse') # 进行训练 for step in range(3001): # 每次训练一个批次 cost = model.train_on_batch(x_data, y_data) # 每500个batch打印一次cost值 if step % 500 == 0: print('cost: ', cost) # 打印权值和偏置值 W, b = model.layers[0].get_weights() print('W:', W, ' b: ', b) print(len(model.layers)) # 把x_data输入网络中,得到预测值y_pred y_pred = model.predict(x_data) # 显示随机点 plt.scatter(x_data, y_data) # 显示预测结果 plt.plot(x_data, y_pred, 'r-', lw=3) plt.show()
''' 调整学习率的方法 默认lr=0.01,首先导入SGD: from keras.optimizers import SGD 然后定义一个sgd: sgd=SGD(lr=0.1) ''' model = Sequential() # 定义优化算法并指定学习率 sgd = SGD(lr=0.1) #构建一个1-10-1结构的网络 model.add(Dense(units=10, input_dim=1, name='fc_1')) model.add(Activation('tanh')) model.add(Dense(units=1, input_dim=10, name='fc_2')) model.add(Activation('tanh')) # 编译模型,打印出模型结构 model.compile(optimizer=sgd, loss='mse') model.summary() for step in range(10001): cost = model.train_on_batch(x_data, y_data) if step % 500 == 0: print("cost", cost) y_pred = model.predict(x_data) plt.scatter(x_data, y_data) plt.plot(x_data, y_pred, 'r-', lw=3) plt.show()