def plot_activations(): '''This function plots all the activation functions implemented.''' fig, ax = plt.subplots(1, 1, figsize=(5, 5)) x_range = np.arange(-3, 3, 0.01) x = torch.Tensor(x_range) tanh = Tanh() plt.plot(x_range, tanh.forward(x).numpy(), color='b', label='Tanh', alpha=0.5) plt.plot(x_range, tanh.backward(1).numpy(), color='b', label='Tanh derivative', alpha=0.5, linestyle=':') relu = ReLU() plt.plot(x_range, relu.forward(x).numpy(), color='g', label='ReLU (0)', alpha=0.5) plt.plot(x_range, relu.backward(1).numpy(), color='g', label='ReLU derivative', alpha=0.5, linestyle=':') leakyrelu = LeakyReLU() plt.plot(x_range, leakyrelu.forward(x).numpy(), color='m', label='LeakyReLU (0.01)', alpha=0.5) plt.plot(x_range, leakyrelu.backward(1).numpy(), color='m', label='LeakyReLU derivative', alpha=0.5, linestyle=':') prelu = PReLU(init=0.1) plt.plot(x_range, prelu.forward(x).numpy(), color='y', label='PReLU', alpha=0.5) plt.plot(x_range, prelu.backward(1).numpy(), color='y', label='PReLU derivative (0.1 - trainable)', alpha=0.5, linestyle=':') plt.legend(framealpha=1) plt.tight_layout() plt.savefig('figures/activations.png', dpi=300) plt.show()
class LSTM(Layer): def __init__(self, units=0, input_shape=0, dtype=np.float32, gate_act=Sigmoid): super(LSTM, self).__init__(input_shape, units) self.gate_acts = { "I": gate_act(), "F": gate_act(), "O": gate_act(), "U": Tanh() } self.act_tanh = Tanh() self.Wx = {"I": None, "F": None, "U": None, "O": None} self.Wh = {"I": None, "F": None, "U": None, "O": None} self.B = {"I": None, "F": None, "U": None, "O": None} for k in ["I", "F", "U", "O"]: self.Wx[k] = np.random.uniform(-1, 1, (input_shape, units)).astype(dtype) self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype) self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype) def configure(self, data_shape, phase, prevLayer=None): self.batch = data_shape[0] for k in self.gate_acts: self.gate_acts[k].configure(data_shape, phase, prevLayer) self.act_tanh.configure(data_shape, phase, prevLayer) self.optimizers = [] for i in range(8): self.optimizers.append(copy.deepcopy(self.optimizer)) self.buff = { "C": None, "C_1": None, "H": None, "H_1": None, "I": None, "F": None, "U": None, "O": None, "X": None } for k in self.buff: self.buff[k] = np.zeros((self.batch, self.units)) self.X = np.zeros((self.batch, self.input_shape), dtype=self.dtype) def forward(self, x): self.X[:] = x for k in ["I", "F", "O", "U"]: self.buff[k] = self.gate_acts[k].forward( self.X.dot(self.Wx[k]) + self.buff["H_1"].dot(self.Wh[k]) + self.B[k]) self.buff["C"] = self.buff["I"] * self.buff["C_1"] + self.buff[ "U"] * self.buff["I"] self.Ctanh = self.act_tanh.forward(self.buff["C"]) self.buff["H"] = self.Ctanh * self.buff["O"] self.buff["C_1"] = self.buff["C"] self.buff["H_1"] = self.buff["H"] return self.buff["H"] def backward(self, e): delta = {} delta["C"] = self.act_tanh.backward(e) * self.buff["O"] delta["C_1"] = delta["C"] * self.buff["F"] delta["O"] = self.gate_acts["O"].backward(e) * self.Ctanh delta["I"] = self.gate_acts["I"].backward(delta["C"]) * self.buff["U"] delta["U"] = self.gate_acts["U"].backward(delta["C"]) * self.buff["I"] delta["F"] = self.gate_acts["F"].backward( delta["C"]) * self.buff["C_1"] delta["H"] = delta["I"].dot(self.Wh["I"].T) + delta["O"].dot( self.Wh["O"].T) + delta["U"].dot(self.Wh["U"].T) + delta["F"].dot( self.Wh["F"].T) #update for i, k in enumerate(["I", "F", "U", "O"]): np.subtract( self.Wx[k], self.optimizers[i](np.sum(np.einsum( "bi,bj->bij", self.X, self.learning_rate * delta[k]), axis=0)) / self.batch, self.Wx[k]) np.subtract( self.Wh[k], self.optimizers[4 + i](np.sum( np.einsum("bi,bj->bij", self.buff["H_1"], self.learning_rate * delta[k]), axis=0)) / self.batch, self.Wh[k]) self.B[k] -= np.sum(self.learning_rate * delta[k]) return delta["H"]