Example #1
0
    def _init_params(self):
        self._dv = {}

        self.conv_dilation = Conv1D(
            stride=1,
            pad="causal",
            init=self.init,
            kernel_width=2,
            dilation=self.dilation,
            out_ch=self.ch_dilation,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.tanh = Tanh()
        self.sigm = Sigmoid()
        self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0))

        self.conv_1x1 = Conv1D(
            stride=1,
            pad="same",
            dilation=0,
            init=self.init,
            kernel_width=1,
            out_ch=self.ch_residual,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.add_residual = Add(act_fn=Affine(slope=1, intercept=0))
        self.add_skip = Add(act_fn=Affine(slope=1, intercept=0))
Example #2
0
def test_sigmoid_activation(N=None):
    from activations import Sigmoid

    N = np.inf if N is None else N

    mine = Sigmoid()
    gold = expit

    i = 0
    while i < N:
        n_dims = np.random.randint(1, 100)
        z = random_tensor((1, n_dims))
        assert_almost_equal(mine.fn(z), gold(z))
        print("PASSED")
        i += 1
Example #3
0
    def set_params(self, summary_dict):
        cids = self.hyperparameters["component_ids"]
        for k, v in summary_dict["parameters"].items():
            if k == "components":
                for c, cd in summary_dict["parameters"][k].items():
                    if c in cids:
                        getattr(self, c).set_params(cd)

            elif k in self.parameters:
                self.parameters[k] = v

        for k, v in summary_dict["hyperparameters"].items():
            if k == "components":
                for c, cd in summary_dict["hyperparameters"][k].items():
                    if c in cids:
                        getattr(self, c).set_params(cd)

            if k in self.hyperparameters:
                if k == "act_fn" and v == "ReLU":
                    self.hyperparameters[k] = ReLU()
                elif v == "act_fn" and v == "Sigmoid":
                    self.hyperparameters[k] = Sigmoid()
                elif v == "act_fn" and v == "Tanh":
                    self.hyperparameters[k] = Tanh()
                elif v == "act_fn" and "Affine" in v:
                    r = r"Affine\(slope=(.*), intercept=(.*)\)"
                    slope, intercept = re.match(r, v).groups()
                    self.hyperparameters[k] = Affine(float(slope),
                                                     float(intercept))
                elif v == "act_fn" and "Leaky ReLU" in v:
                    r = r"Leaky ReLU\(alpha=(.*)\)"
                    alpha = re.match(r, v).groups()[0]
                    self.hyperparameters[k] = LeakyReLU(float(alpha))
                else:
                    self.hyperparameters[k] = v
Example #4
0
def test_sigmoid_grad(N=None):
    from activations import Sigmoid

    N = np.inf if N is None else N

    mine = Sigmoid()
    gold = torch_gradient_generator(torch.sigmoid)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims))
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1
Example #5
0
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        """
        :param n_inputs: dimension of inputs
        :param n_hidden: dimension of hidden layer
        :param n_output: dimension of output (token)
        :param activation: either sigmoid or tanh
        """
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output

        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation = Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
Example #6
0
def main():
    
    print('Loading data')
    data=Dataset("mnist_train.csv","mnist_test.csv",0.10,10)
    dset=data.create_dataset()
    print('Data loaded')
    
    image_size = 28 # width and length of mnist image
    num_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9, since mnist has 10 classes.
    image_pixels = image_size * image_size
    
    # create hidden layes list, the list length should be equal to number of layers and the numbers should correspond to number of hidden neurons in each layer.
    hiddens = [128,128,64]# this gives 3 hidden layes of size 128,128 and 64 respectively
    
    #since cross entropy is used the last activation layer should be identity.
    # size of activation list should be equal to len(hidden)+1 with last layer as identity for cross entrpoy
    activations = [Sigmoid(), Tanh(), Sigmoid(), Identity()]
    lr = 0.1
    num_epochs = 100
    batch_size = 784
    

    # build your MLP model
    mlp = MLP(
        input_size=image_pixels, 
        output_size=num_labels, 
        hiddens=hiddens, 
        activations=activations, 
        criterion=SoftmaxCrossEntropy(), 
        lr=lr
    )

    # train the neural network
    t=train_test(mlp, dset, num_epochs, batch_size)
    t.train_network()
    
    #create plots
    t.plots()
    
    #test network
    t.test_network()
    
    #save trained weights.
    t.save('weights.npz')
Example #7
0
 def __init__(
     self,
     L=1,  #number of hidden layers#
     n=np.random.randint(
         1, 6, size=1
     ),  #network size for each hidden layer n[0]=n_1, ..., m[L-1]=n_L#
     activation=Sigmoid()):
     self.L = L
     self.n = n
     self.activation = activation
 def __init__(self, n_trees=100, classification=True, learning_rate=0.01, max_depth=10):
     self.n_trees = n_trees
     self.classification = classification
     self.learning_rate = learning_rate
     self.max_depth = max_depth
     self.X = None
     self.y = None
     self.weak_learners = []
     self.preds = []
     self.Sigmoid = Sigmoid()
Example #9
0
 def __init__(
     self,
     inputvector=[],  #input vector, has to be array type#
     activation=Sigmoid(),  #activation function, will be applied termwise#
     weight=[],  #weights from input to output layer, should be an array of size outputsize x inputsize#
     bias=[],  #bias vectors in the particular layer, a vector array of length = outputsize#
 ):
     self.inputvector = inputvector
     self.activation = activation
     self.weight = weight
     self.bias = bias
Example #10
0
 def __init__(self, out_channels: int, 
                     param_size: int, 
                     dropout: float = 1.0,
                     weight_init: str = 'normal',
                     activation: Operation = Sigmoid(), 
                     flatten: bool = False):
     super().__init__(out_channels)
     self.out_channels = out_channels
     self.param_size = param_size
     self.activation = activation
     self.flatten = flatten
     self.dropout = dropout
     self.weight_init = weight_init
Example #11
0
    def __init__(self, in_size, h_size, o_size):
        I, H, O = in_size, h_size, o_size

        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]

        self.params = []
        for layer in self.layers:
            self.params += layer.params
Example #12
0
    def _build_decoder(self):
        """
        MLP decoder

        FC1 -> ReLU -> FC2 -> Sigmoid
        """
        self.decoder = OrderedDict()
        self.decoder["FC1"] = FullyConnected(
            act_fn=ReLU(),
            init=self.init,
            n_out=self.latent_dim,
            optimizer=self.optimizer,
        )
        # NB. `n_out` is dependent on the dimensionality of X. we use a
        # placeholder for now, and update it within the `forward` method
        self.decoder["FC2"] = FullyConnected(
            n_out=None, act_fn=Sigmoid(), optimizer=self.optimizer, init=self.init
        )
 def __init__(
         self,
         L=1,  #number of hidden layers#
         n=np.random.randint(
             1, 6, size=1
         ),  #network size for each hidden layer n[0]=n_1, ..., m[L-1]=n_L#
         activation=Sigmoid(),
         weight=[],
         bias=[],
         outputsequence=[],
         preoutputsequence=[]):
     self.L = L
     self.n = n
     self.activation = activation
     self.weight = weight
     self.bias = bias
     self.outputsequence = outputsequence
     self.preoutputsequence = preoutputsequence
Example #14
0
    def __init__(self, inputDim = 1, outputDim = 1, activation = Sigmoid(), optimizer = Adam()):
        self.inputDim = inputDim
        self.outputDim = outputDim

        # set the activation function
        self.activation = activation

        # set optimizer
        self.weightOptimizer = copy.copy(optimizer)
        self.biasOptimizer = copy.copy(optimizer)

        # randomly initialize the weight and biases
        limit = np.sqrt(6 / (inputDim + outputDim)) # xavier uniform initializer
        self.weight = np.random.uniform(-limit, limit, (outputDim, inputDim))
        self.bias   = np.zeros(outputDim)

        # trainable decides whether weight and biases are trained in backward pass
        self.trainable = True # Layers can also be frozen !
Example #15
0
    def __init__(
        self,
        n_out,
        act_fn=None,
        gate_fn=None,
        merge_mode="concat",
        init="glorot_uniform",
        optimizer=None,
    ):
        """
        A single bidirectional long short-term memory (LSTM) layer.

        Parameters
        ----------
        n_out : int
            The dimension of a single hidden state / output on a given timestep
        act_fn : `activations.Activation` instance (default: None)
            The activation function for computing A[t]. If not specified, use
            Tanh by default.
        gate_fn : `activations.Activation` instance (default: None)
            The gate function for computing the update, forget, and output
            gates. If not specified, use Sigmoid by default.
        merge_mode : str (default: "concat")
            Mode by which outputs of the forward and backward LSTMs will be
            combined. Valid values are {"sum", "multiply", "concat", "average"}.
        init : str (default: 'glorot_uniform')
            The weight initialization strategy. Valid entries are
            {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'}
        optimizer : str or `OptimizerBase` instance (default: None)
            The optimization strategy to use when performing gradient updates
            within the `update` method.  If `None`, use the `SGD` optimizer with
            default parameters.
        """
        super().__init__()

        self.init = init
        self.n_in = None
        self.n_out = n_out
        self.optimizer = optimizer
        self.merge_mode = merge_mode
        self.act_fn = Tanh() if act_fn is None else act_fn
        self.gate_fn = Sigmoid() if gate_fn is None else gate_fn
        self._init_params()
Example #16
0
 def init_from_str(self, act_str):
     act_str = act_str.lower()
     if act_str == "relu":
         act_fn = ReLU()
     elif act_str == "tanh":
         act_fn = Tanh()
     elif act_str == "sigmoid":
         act_fn = Sigmoid()
     elif "affine" in act_str:
         r = r"affine\(slope=(.*), intercept=(.*)\)"
         slope, intercept = re.match(r, act_str).groups()
         act_fn = Affine(float(slope), float(intercept))
     elif "leaky relu" in act_str:
         r = r"leaky relu\(alpha=(.*)\)"
         alpha = re.match(r, act_str).groups()[0]
         act_fn = LeakyReLU(float(alpha))
     else:
         raise ValueError("Unknown activation: {}".format(act_str))
     return act_fn
 def __init__(self,
              n_iters=10000,
              hidden_activation=Sigmoid(),
              output_activation=Linear(),
              learning_rate=1e-2,
              n_hidden=10,
              loss=MSE(),
              mini_batch=10):
     self.n_iters = n_iters
     self.hidden_activation = hidden_activation
     self.output_activation = output_activation
     self.learning_rate = learning_rate
     self.W = None
     self.W0 = None
     self.V = None
     self.V0 = None
     self.mini_batch = mini_batch
     self.loss = loss
     self.X, self.y = None, None
     self.n_hidden = n_hidden
Example #18
0
def main():
    # 先讀取資料,並建立模型。
    # 輸入的維度為一個資料的長度,因為資料量小,batch size即為資料總數。
    x, y = ParityBits(8).load_data()
    batch_size, input_dim = x.shape
    model = Sequential(
        [Dense(64, activation=ReLU()),
         Dense(32, activation=Tanh()),
         Dense(16, activation=Tanh()),
         Dense(4, activation=None),
         Dense(1, activation=Sigmoid())],
        input_dim=input_dim,
        # 使用GD為優化器,MSE為損失函式。
        optimizer=GradientDescent(learning_rate=0.01, momentum=0.0),
        loss=MeanSquaredError())
    # 設定好epochs後訓練模型,訓練完後取得預測結果和每個epoch的損失值。
    y_pred, losses = model.train(
        x, y, batch_size=batch_size, epochs=200, verbose_step=10)

    # 因為答案皆為整數0或1,因此訓練的成果為模型預測的結果取整數。
    result = np.around(y_pred).astype(int)
    # 將答案與訓練成果相減。
    diff = np.subtract(y, result)
    print(pd.DataFrame({
        # 印出表格時,須將輸入的資料的每項陣列例如`[0 0 0 0 0 0 0 0]`轉成字串,
        # 因為Pandas的DataFrame的每一項不能吃陣列。
        "Data": [np.array_str(v) for v in x],
        "Answer": y[:, 0],
        "Prediction": [f'{v:.8f}' for v in y_pred[:, 0]],
        "Result": result[:, 0],
        # 如果答案與訓練成果在相減之後為0的話代表預測正確,否則失敗。
        "Correct": [True if v == 0 else False for v in diff[:, 0]]
    }, index=np.arange(1, len(x) + 1)).to_string())
    # 輸出最後的損失值和訓練成果與答案差了幾項,並繪製每個epoch與其損失值的變化圖表。
    print(f'loss: {losses[-1]:.8f}, difference: {np.count_nonzero(diff)}')
    plt.figure(figsize=(8, 4))
    plt.plot(losses)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show()
Example #19
0
def plot_activations():
    fig, axes = plt.subplots(2, 3, sharex=True, sharey=True)
    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU()]
    for ax, fn in zip(axes.flatten(), fns):
        X = np.linspace(-3, 3, 100).astype(float).reshape(100, 1)
        ax.plot(X, fn(X), label=r"$y$", alpha=0.7)
        ax.plot(X, fn.grad(X), label=r"$\frac{dy}{dx}$", alpha=0.7)
        ax.plot(X, fn.grad2(X), label=r"$\frac{d^2 y}{dx^2}$", alpha=0.7)
        ax.hlines(0, -3, 3, lw=1, linestyles="dashed", color="k")
        ax.vlines(0, -1.2, 1.2, lw=1, linestyles="dashed", color="k")
        ax.set_ylim(-1.1, 1.1)
        ax.set_xlim(-3, 3)
        ax.set_xticks([])
        ax.set_yticks([-1, 0, 1])
        ax.xaxis.set_visible(False)
        #  ax.yaxis.set_visible(False)
        ax.set_title("{}".format(fn))
        ax.legend(frameon=False)
        sns.despine(left=True, bottom=True)

    fig.set_size_inches(8, 5)
    plt.tight_layout()
    plt.savefig("plot.png", dpi=300)
    plt.close("all")
Example #20
0
def cross_val_results(verbose=True):
    """
    Function for generating the accuracy results of four models presented in the report with their best parameters,
    averaged over 10 runs and using different combinations of the available optimizers and loss
    
    :param verbose: whether to print average results for each (Model, Optimizer, Loss) combination,
                    boolean, optional, default is True

    :returns: list of tuples containing (mean, std) of each (Model, Optimizer, Loss) combination, each tuple in [0, 1]^2
    """

    datasets = []

    for i in range(10):
        datasets.append((generate_disc_set(1000), generate_disc_set(1000)))

    relu_model = Sequential(Linear(2, 25), ReLU(),
                            Linear(25, 25), ReLU(),
                            Linear(25, 25), ReLU(),
                            Linear(25, 2), xavier_init=True)

    leaky_relu_model = Sequential(Linear(2, 25), LeakyReLU(),
                                  Linear(25, 25), LeakyReLU(),
                                  Linear(25, 25), LeakyReLU(),
                                  Linear(25, 2), xavier_init=True)

    tanh_model = Sequential(Linear(2, 25), Tanh(),
                            Linear(25, 25), Tanh(),
                            Linear(25, 25), Tanh(),
                            Linear(25, 2), xavier_init=True)

    sigmoid_model = Sequential(Linear(2, 25), Sigmoid(),
                               Linear(25, 25), Sigmoid(),
                               Linear(25, 25), Sigmoid(),
                               Linear(25, 2))

    models = [relu_model, leaky_relu_model, tanh_model, sigmoid_model]

    final_scores = []

    optimizers_names = ["SGD", "Adam"]
    models_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"]

    losses_names = ["MSE", "CrossEntropy"]
    losses = [LossMSE(), LossCrossEntropy()]

    adam_params = {"ReLU": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Leaky": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Tanh": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Sigmoid": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}}

    sgd_params = {"ReLU": {"lr": 0.001},
                  "Leaky": {"lr": 0.001},
                  "Tanh": {"lr": 0.001},
                  "Sigmoid": {"lr": 0.01}}

    for optim_name in optimizers_names:
        for loss_name, loss in zip(losses_names, losses):
            for model_name, model in zip(models_names, models):
                if verbose:
                    print("Validating model {} with {} and {} loss...".format(model_name, optim_name, loss_name),
                          end='')
                scores = []

                if optim_name == "Adam":
                    params = adam_params[model_name]
                    optim = Adam(model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"],
                                 b1=params["b1"], b2=params["b2"], epsilon=params["epsilon"])
                else:
                    params = sgd_params[model_name]
                    optim = SGD(relu_model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"])

                for ((train_input, train_target), (test_input, test_target)) in datasets:
                    optim.model = copy.deepcopy(model)

                    optim.train(train_input, train_target, verbose=False)

                    evaluator = Evaluator(optim.model)
                    accuracy = evaluator.compute_accuracy(test_input, test_target)

                    scores.append(accuracy)
                scores = torch.FloatTensor(scores)
                scores_mean = torch.mean(scores).item()
                scores_var = torch.std(scores).item()

                if verbose:
                    print("Score : {0:.3f} (+/- {1:.3f}) ".format(scores_mean, scores_var))

                final_scores.append((scores_mean, scores_var))

    return final_scores
if __name__ == "__main__":
    x = np.array([[0.05, .1]])
    W1 = np.array([[.15, .20], [.25, .30]])

    W2 = np.array([[.40, .45], [.50, .55]])

    b1 = .35
    b2 = 0.60

    y_true = np.array([[.01, .99]])

    #Layers Generation
    dense = Dense(2, W1, b1)
    dense2 = Dense(2, W2, b2)

    activation1 = Sigmoid()
    # activation2=Sigmoid()
    activation2 = Activation("sigmoid")

    loss_func = MSE()

    #Forward Pass
    # Dense -> Activation -> Dense -> Activation -> y_pred

    z1 = dense.forward(x)
    a1 = activation1.forward(z1)
    print("Activation Value:", a1)

    z2 = dense2.forward(a1)
    a2 = activation2.forward(z2)
    y_pred = a2
Example #22
0
 def __init__(self, n_iter=1000, learning_rate=1e-4, intercept=False):
     self.weights = None
     self.n_iter = n_iter
     self.learning_rate = learning_rate
     self.intercept = intercept
     self.sigmoid = Sigmoid()
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = np_utils.to_categorical(y)
    y = y.reshape(len(y), 2, 1)
    return x, y


# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Sigmoid(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]

epochs = 20
learning_rate = 0.1

# train
for e in range(epochs):
    error = 0
    for x, y in zip(x_train, y_train):
        # forward
Example #24
0
import numpy as np
import matplotlib.pyplot as plt

from activations import Sigmoid, ReLU, Tanh, Exponential
from fullnetwork import onelayer, fullnetwork
from backpropagation import backpropagation
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import animation

#number of hidden layers#
L = 3
#network size for each hidden layer n[0]=n_1, ..., n[L-1]=n_L#
n = np.random.randint(1, 5, size=L)
#activation function#
sigma = Sigmoid()
#number of iterations#
N = 100

#set the network#
network = fullnetwork(L=L, n=n, activation=sigma)
#set the initial weight and bias#
weight, bias = network.setparameter()

#choose one layer from from [1, L-1]#
weightindex_startlayer = np.random.randint(1, L, size=None)
#its next layer#
weightindex_nextlayer = weightindex_startlayer + 1
#the two weights taken from randomly sample two neurons from each of the above layers, from [1, width of that layer]#
weightindex_neuron_startlayer = np.random.randint(
    1, n[weightindex_startlayer - 1] + 1, size=2)
 def __init__(self):
     self.sigmoid = Sigmoid()
Example #26
0
            loss = self.loss_func.forward(l, o)
            losses.append(self.loss_func.backward(l, o))
            self.total_loss += loss
        layer['out_losses'] = losses


if __name__ == '__main__':
    from estimators import MSELoss as MSE
    from activations import PRelu, Sigmoid
    from initors import GaussInitor

    import os
    TRAIN = True
    model_file = './nnmode.ckpt'
    relu = PRelu(0)
    sigmoid = Sigmoid(0)
    mse = MSE()
    initor = GaussInitor()
    activations = {'prelu': relu}
    NET = NeuralNetwork(input_num=2,
                        lr=0.01,
                        activations=activations,
                        loss_obj=mse,
                        initor_obj=initor)
    if os.path.isfile(model_file) and not TRAIN:
        NET.load_model(model_file)
    else:
        NET.add_layer(
            'hidden',
            2,
            activation='prelu',
Example #27
0
 def __init__(self, neurons: int, activation: Operation = Sigmoid(), dropout: float = 1.0, weight_init: str = 'standard'):
     super().__init__(neurons)
     self.dropout = dropout
     self.activation = activation
     self.weight_init = weight_init
Example #28
0
              outputDim=1024,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    discriminator.addLayer(
        Dense(inputDim=1024,
              outputDim=512,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    discriminator.addLayer(
        Dense(inputDim=512,
              outputDim=256,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    discriminator.addLayer(
        Dense(inputDim=256,
              outputDim=1,
              activation=Sigmoid(),
              optimizer=optimizer))

    gan = GAN(generator, discriminator)
    print(gan)

    gan.train(dataset,
              loss=MSE(),
              epochs=50,
              metrics=["generator_loss", "discriminator_loss"],
              tensorboard=True,
              callbacks=[])

    gan.save("tryout_gan")
Example #29
0
def main():
    """
    Function containing the main code definition, display all functionalities provided by the framework
    """

    # Different activation functions and setting of automatic Xavier parameter initialization
    relu_model = Sequential(Linear(2, 25),
                            ReLU(),
                            Linear(25, 25),
                            ReLU(),
                            Linear(25, 25),
                            ReLU(),
                            Linear(25, 2),
                            xavier_init=True)
    leaky_relu_model = Sequential(Linear(2, 25),
                                  LeakyReLU(),
                                  Linear(25, 25),
                                  LeakyReLU(),
                                  Linear(25, 25),
                                  LeakyReLU(),
                                  Linear(25, 2),
                                  xavier_init=True)
    tanh_model = Sequential(Linear(2, 25),
                            Tanh(),
                            Linear(25, 25),
                            Tanh(),
                            Linear(25, 25),
                            Tanh(),
                            Linear(25, 2),
                            xavier_init=True)
    sigmoid_model = Sequential(Linear(2, 25),
                               Sigmoid(),
                               Linear(25, 25),
                               Sigmoid(),
                               Linear(25, 25),
                               Sigmoid(),
                               Linear(25, 2),
                               xavier_init=False)

    model_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"]

    train_input, train_target = generate_disc_set(1000)
    test_input, test_target = generate_disc_set(1000)

    # Model training without cross-validation of the optimizer parameters
    optimizer = SGDCV(leaky_relu_model, nb_epochs=25)
    optimizer.train(train_input, train_target)

    evaluator = Evaluator(leaky_relu_model)

    print("Train accuracy using LeakyReLU: {:.1f}%".format(
        (evaluator.compute_accuracy(train_input, train_target) * 100).item()))
    print("Test accuracy using LeakyReLU: {:.1f}%".format(
        (evaluator.compute_accuracy(test_input, test_target) * 100).item()))

    models = (relu_model, leaky_relu_model, tanh_model, sigmoid_model)

    sgd_cross_val_param_grid = {"lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]}
    adam_cross_val_param_grid = {
        "lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        "b1": [0.9, 0.8],
        "b2": [0.999, 0.888],
        "epsilon": [1e-8, 1e-7, 1e-6]
    }

    adam_params = {
        "ReLU": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Leaky": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Tanh": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Sigmoid": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        }
    }

    sgd_params = {
        "ReLU": {
            "lr": [0.001]
        },
        "Leaky": {
            "lr": [0.001]
        },
        "Tanh": {
            "lr": [0.001]
        },
        "Sigmoid": {
            "lr": [0.01]
        }
    }

    mse_loss = not args.CE
    optimizer_sgd = not args.Adam
    cross_validate = args.cross_val

    # Different loss functions
    if mse_loss:
        criterion = LossMSE()
    else:
        criterion = LossCrossEntropy()

    for name, model in zip(model_names, models):
        if optimizer_sgd:
            # SGD optimizer parameter cross-validation
            optimizer = SGDCV(model, mini_batch_size=10, criterion=criterion)

            if cross_validate:
                params = sgd_cross_val_param_grid
            else:
                params = sgd_params[name]

            cross_val_results, best_params_score = optimizer.cross_validate(
                values=params)

            print("Best params for model using {} : (lr={:.3f})".format(
                name, best_params_score["lr"]))
        else:
            # Adam optimizer parameter cross-validation
            optimizer = AdamCV(model, mini_batch_size=10, criterion=criterion)

            if cross_validate:
                params = adam_cross_val_param_grid
            else:
                params = adam_params[name]

            cross_val_results, best_params_score = optimizer.cross_validate(
                values=params)

            print(
                "Best params for model using {} : (lr={:.3f}, b1={:.3f}, b2={:.3f}, epsilon={:.1e})"
                .format(name, best_params_score["lr"], best_params_score["b1"],
                        best_params_score["b2"], best_params_score["epsilon"]))

        print("Best score for model using {} : {:.3f} (+/- {:.3f})".format(
            name, best_params_score["mean"], best_params_score["std"]))
Example #30
0
class WavenetResidualModule(ModuleBase):
    def __init__(
        self,
        ch_residual,
        ch_dilation,
        dilation,
        kernel_width,
        optimizer=None,
        init="glorot_uniform",
    ):
        """
        A WaveNet-like residual block with causal dilated convolutions.

        *Skip path in* >-------------------------------------------> + --------> *Skip path out*
                          Causal      |--> Tanh --|                  |
        *Main    |--> Dilated Conv1D -|           * --> 1x1 Conv1D --|
         path >--|                    |--> Sigm --|                  |
         in*     |-------------------------------------------------> + --------> *Main path out*
                                     *Residual path*

        On the final block, the output of the skip path is further processed to
        produce the network predictions.

        See van den Oord et al. (2016) at https://arxiv.org/pdf/1609.03499.pdf
        for further details.

        Parameters
        ----------
        ch_residual : int
            The number of output channels for the 1x1 Conv1D layer in the main
            path
        ch_dilation : int
            The number of output channels for the causal dilated Conv1D layer
            in the main path
        dilation : int
            The dilation rate for the causal dilated Conv1D layer in the main
            path
        kernel_width : int
            The width of the causal dilated Conv1D kernel in the main path
        init : str (default: 'glorot_uniform')
            The weight initialization strategy. Valid entries are
            {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'}
        optimizer : str or `OptimizerBase` instance (default: None)
            The optimization strategy to use when performing gradient updates
            within the `update` method.  If `None`, use the `SGD` optimizer with
            default parameters.
        """
        super().__init__()

        self.init = init
        self.dilation = dilation
        self.optimizer = optimizer
        self.ch_residual = ch_residual
        self.ch_dilation = ch_dilation
        self.kernel_width = kernel_width

        self._init_params()

    def _init_params(self):
        self._dv = {}

        self.conv_dilation = Conv1D(
            stride=1,
            pad="causal",
            init=self.init,
            kernel_width=2,
            dilation=self.dilation,
            out_ch=self.ch_dilation,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.tanh = Tanh()
        self.sigm = Sigmoid()
        self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0))

        self.conv_1x1 = Conv1D(
            stride=1,
            pad="same",
            dilation=0,
            init=self.init,
            kernel_width=1,
            out_ch=self.ch_residual,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.add_residual = Add(act_fn=Affine(slope=1, intercept=0))
        self.add_skip = Add(act_fn=Affine(slope=1, intercept=0))

    @property
    def parameters(self):
        return {
            "components": {
                "conv_1x1": self.conv_1x1.parameters,
                "add_skip": self.add_skip.parameters,
                "add_residual": self.add_residual.parameters,
                "conv_dilation": self.conv_dilation.parameters,
                "multiply_gate": self.multiply_gate.parameters,
            }
        }

    @property
    def hyperparameters(self):
        return {
            "layer":
            "WavenetResidualModule",
            "init":
            self.init,
            "dilation":
            self.dilation,
            "optimizer":
            self.optimizer,
            "ch_residual":
            self.ch_residual,
            "ch_dilation":
            self.ch_dilation,
            "kernel_width":
            self.kernel_width,
            "component_ids": [
                "conv_1x1",
                "add_skip",
                "add_residual",
                "conv_dilation",
                "multiply_gate",
            ],
            "components": {
                "conv_1x1": self.conv_1x1.hyperparameters,
                "add_skip": self.add_skip.hyperparameters,
                "add_residual": self.add_residual.hyperparameters,
                "conv_dilation": self.conv_dilation.hyperparameters,
                "multiply_gate": self.multiply_gate.hyperparameters,
            },
        }

    @property
    def derived_variables(self):
        dv = {
            "conv_1x1_out": None,
            "conv_dilation_out": None,
            "multiply_gate_out": None,
            "components": {
                "conv_1x1": self.conv_1x1.derived_variables,
                "add_skip": self.add_skip.derived_variables,
                "add_residual": self.add_residual.derived_variables,
                "conv_dilation": self.conv_dilation.derived_variables,
                "multiply_gate": self.multiply_gate.derived_variables,
            },
        }
        dv.update(self._dv)
        return dv

    @property
    def gradients(self):
        return {
            "components": {
                "conv_1x1": self.conv_1x1.gradients,
                "add_skip": self.add_skip.gradients,
                "add_residual": self.add_residual.gradients,
                "conv_dilation": self.conv_dilation.gradients,
                "multiply_gate": self.multiply_gate.gradients,
            }
        }

    def forward(self, X_main, X_skip=None):
        self.X_main, self.X_skip = X_main, X_skip
        conv_dilation_out = self.conv_dilation.forward(X_main)

        tanh_gate = self.tanh.fn(conv_dilation_out)
        sigm_gate = self.sigm.fn(conv_dilation_out)

        multiply_gate_out = self.multiply_gate.forward([tanh_gate, sigm_gate])
        conv_1x1_out = self.conv_1x1.forward(multiply_gate_out)

        # if this is the first wavenet block, initialize the "previous" skip
        # connection sum to 0
        self.X_skip = np.zeros_like(conv_1x1_out) if X_skip is None else X_skip

        Y_skip = self.add_skip.forward([X_skip, conv_1x1_out])
        Y_main = self.add_residual.forward([X_main, conv_1x1_out])

        self._dv["tanh_out"] = tanh_gate
        self._dv["sigm_out"] = sigm_gate
        self._dv["conv_dilation_out"] = conv_dilation_out
        self._dv["multiply_gate_out"] = multiply_gate_out
        self._dv["conv_1x1_out"] = conv_1x1_out
        return Y_main, Y_skip

    def backward(self, dY_skip, dY_main=None):
        dX_skip, dConv_1x1_out = self.add_skip.backward(dY_skip)

        # if this is the last wavenet block, dY_main will be None. if not,
        # calculate the error contribution from dY_main and add it to the
        # contribution from the skip path
        dX_main = np.zeros_like(self.X_main)
        if dY_main is not None:
            dX_main, dConv_1x1_main = self.add_residual.backward(dY_main)
            dConv_1x1_out += dConv_1x1_main

        dMultiply_out = self.conv_1x1.backward(dConv_1x1_out)
        dTanh_out, dSigm_out = self.multiply_gate.backward(dMultiply_out)

        conv_dilation_out = self.derived_variables["conv_dilation_out"]
        dTanh_in = dTanh_out * self.tanh.grad(conv_dilation_out)
        dSigm_in = dSigm_out * self.sigm.grad(conv_dilation_out)
        dDilation_out = dTanh_in + dSigm_in

        conv_back = self.conv_dilation.backward(dDilation_out)
        dX_main += conv_back

        self._dv["dLdTanh"] = dTanh_out
        self._dv["dLdSigmoid"] = dSigm_out
        self._dv["dLdConv_1x1"] = dConv_1x1_out
        self._dv["dLdMultiply"] = dMultiply_out
        self._dv["dLdConv_dilation"] = dDilation_out
        return dX_main, dX_skip