Ejemplo n.º 1
0
def plot_activations():
    '''This function plots all the activation functions implemented.'''
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    x_range = np.arange(-3, 3, 0.01)
    x = torch.Tensor(x_range)
    tanh = Tanh()
    plt.plot(x_range,
             tanh.forward(x).numpy(),
             color='b',
             label='Tanh',
             alpha=0.5)
    plt.plot(x_range,
             tanh.backward(1).numpy(),
             color='b',
             label='Tanh derivative',
             alpha=0.5,
             linestyle=':')
    relu = ReLU()
    plt.plot(x_range,
             relu.forward(x).numpy(),
             color='g',
             label='ReLU (0)',
             alpha=0.5)
    plt.plot(x_range,
             relu.backward(1).numpy(),
             color='g',
             label='ReLU derivative',
             alpha=0.5,
             linestyle=':')
    leakyrelu = LeakyReLU()
    plt.plot(x_range,
             leakyrelu.forward(x).numpy(),
             color='m',
             label='LeakyReLU (0.01)',
             alpha=0.5)
    plt.plot(x_range,
             leakyrelu.backward(1).numpy(),
             color='m',
             label='LeakyReLU derivative',
             alpha=0.5,
             linestyle=':')
    prelu = PReLU(init=0.1)
    plt.plot(x_range,
             prelu.forward(x).numpy(),
             color='y',
             label='PReLU',
             alpha=0.5)
    plt.plot(x_range,
             prelu.backward(1).numpy(),
             color='y',
             label='PReLU derivative (0.1 - trainable)',
             alpha=0.5,
             linestyle=':')
    plt.legend(framealpha=1)
    plt.tight_layout()
    plt.savefig('figures/activations.png', dpi=300)
    plt.show()
Ejemplo n.º 2
0
class LSTM(Layer):
    def __init__(self,
                 units=0,
                 input_shape=0,
                 dtype=np.float32,
                 gate_act=Sigmoid):
        super(LSTM, self).__init__(input_shape, units)
        self.gate_acts = {
            "I": gate_act(),
            "F": gate_act(),
            "O": gate_act(),
            "U": Tanh()
        }
        self.act_tanh = Tanh()
        self.Wx = {"I": None, "F": None, "U": None, "O": None}
        self.Wh = {"I": None, "F": None, "U": None, "O": None}
        self.B = {"I": None, "F": None, "U": None, "O": None}
        for k in ["I", "F", "U", "O"]:
            self.Wx[k] = np.random.uniform(-1, 1,
                                           (input_shape, units)).astype(dtype)
            self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype)
            self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype)

    def configure(self, data_shape, phase, prevLayer=None):
        self.batch = data_shape[0]
        for k in self.gate_acts:
            self.gate_acts[k].configure(data_shape, phase, prevLayer)
        self.act_tanh.configure(data_shape, phase, prevLayer)
        self.optimizers = []
        for i in range(8):
            self.optimizers.append(copy.deepcopy(self.optimizer))
        self.buff = {
            "C": None,
            "C_1": None,
            "H": None,
            "H_1": None,
            "I": None,
            "F": None,
            "U": None,
            "O": None,
            "X": None
        }
        for k in self.buff:
            self.buff[k] = np.zeros((self.batch, self.units))
        self.X = np.zeros((self.batch, self.input_shape), dtype=self.dtype)

    def forward(self, x):
        self.X[:] = x

        for k in ["I", "F", "O", "U"]:
            self.buff[k] = self.gate_acts[k].forward(
                self.X.dot(self.Wx[k]) + self.buff["H_1"].dot(self.Wh[k]) +
                self.B[k])
        self.buff["C"] = self.buff["I"] * self.buff["C_1"] + self.buff[
            "U"] * self.buff["I"]
        self.Ctanh = self.act_tanh.forward(self.buff["C"])
        self.buff["H"] = self.Ctanh * self.buff["O"]
        self.buff["C_1"] = self.buff["C"]
        self.buff["H_1"] = self.buff["H"]
        return self.buff["H"]

    def backward(self, e):
        delta = {}
        delta["C"] = self.act_tanh.backward(e) * self.buff["O"]
        delta["C_1"] = delta["C"] * self.buff["F"]
        delta["O"] = self.gate_acts["O"].backward(e) * self.Ctanh
        delta["I"] = self.gate_acts["I"].backward(delta["C"]) * self.buff["U"]
        delta["U"] = self.gate_acts["U"].backward(delta["C"]) * self.buff["I"]
        delta["F"] = self.gate_acts["F"].backward(
            delta["C"]) * self.buff["C_1"]
        delta["H"] = delta["I"].dot(self.Wh["I"].T) + delta["O"].dot(
            self.Wh["O"].T) + delta["U"].dot(self.Wh["U"].T) + delta["F"].dot(
                self.Wh["F"].T)

        #update
        for i, k in enumerate(["I", "F", "U", "O"]):
            np.subtract(
                self.Wx[k], self.optimizers[i](np.sum(np.einsum(
                    "bi,bj->bij", self.X, self.learning_rate * delta[k]),
                                                      axis=0)) / self.batch,
                self.Wx[k])
            np.subtract(
                self.Wh[k], self.optimizers[4 + i](np.sum(
                    np.einsum("bi,bj->bij", self.buff["H_1"],
                              self.learning_rate * delta[k]),
                    axis=0)) / self.batch, self.Wh[k])
            self.B[k] -= np.sum(self.learning_rate * delta[k])

        return delta["H"]