コード例 #1
0
def test():
    """ Testing stuff to see it works properly """
    a = Random()
    b = Sequential()
    a.enter_name("Player Random")
    b.enter_name("Player Sequential")

    game = SingleGame(a, b)
    game.perform_game()
    game.show_result()
コード例 #2
0
 def __init__(self, hidden_layers):
     Items = []
     linear = Linear(2, 25)
     Items.append(linear)
     Items.append(ReLu())
     for i in range(hidden_layers - 1):
         Items.append(Linear(25, 25))
         Items.append(ReLu())
     Items.append(tanh())
     Items.append(Linear(25, 2))
     self.model = Sequential(Items)
コード例 #3
0
def choose_players():
    """ Let user choose players and player names in textual interface"""

    # Available picks
    player_type_list = ["H", "MC", "S", "R"]
    players = []
    while len(players) < 2:  # Until the player count is two
        i = len(players) + 1  # Number of current player
        while True:
            player_type_input = input(f"Player {i}: ")

            if player_type_input not in player_type_list:
                print("Invalid input! Please try again.")
                continue  # Loop will continue until valid input

            break

        if player_type_input == "H":
            while True:
                try:
                    memo = int(input("What will memory of Historian be? "))
                except:
                    continue
                if 1 <= memo <= 20:
                    break
            player = Historian(memo)

        if player_type_input == "MC":
            player = MostCommon()
        if player_type_input == "S":
            player = Sequential()
        if player_type_input == "R":
            player = Random()

        player_name = input("Player name: ")
        player.enter_name(player_name)

        players.append(player)
        i += 1

    return players  # List with players to enter tournament
コード例 #4
0
    def declare_player_types(self):
        """
         # After the choices have been made for P1 and P2, do the appropriate init for them.
        :return:
        """
        player = None
        for i, choice in enumerate(self.__choices__):
            if choice == "random":
                player = RandomPlayer()

            elif choice == "sequential":
                print(f"Player {i+1} chose Sequential, decide your sequence")
                sequence = [action for action in input("Sequence: ").split(", ")]
                player = Sequential(sequence)

            elif choice == "mostcommon":
                player = MostCommon()

            elif choice == "historian":
                print(f"Player {i+1} chose Sequential, decide your remembering (1,2,3)")
                r = int(input("Remembering: "))
                player = Historian(r=r)

            self.__players__.append(player)
コード例 #5
0
class Neural_network(Module):
    def __init__(self, hidden_layers):
        Items = []
        linear = Linear(2, 25)
        Items.append(linear)
        Items.append(ReLu())
        for i in range(hidden_layers - 1):
            Items.append(Linear(25, 25))
            Items.append(ReLu())
        Items.append(tanh())
        Items.append(Linear(25, 2))
        self.model = Sequential(Items)

    def forward(self, x):
        return self.model.forward(x)

    def backward(self, grad_output):
        return self.model.backward(grad_output)

    def set_zero_grad(self):
        self.model.set_zero_grad()

    def optimisation_step(self, optimizer):
        self.model.optimisation_step(optimizer)
コード例 #6
0
def train(epochs, batch_size, lr, verbose):
    """Main method that trains the network"""
    # autograd globally off
    torch.set_grad_enabled(False)
    # generate training and testing datasets
    train_data, train_label = generate_data()
    test_data, test_label = generate_data()
    # normalize data be centered at 0
    train_data, test_data = normalize(train_data, test_data)

    if verbose:
        print("--- Dataset ---")
        print("Train X: ", train_data.size(), " | Train y: ",
              train_label.size())
        print(" Test X: ", test_data.size(), " |  Test y: ", test_label.size())

    layers = []
    # input layer (2 input units)
    linear1 = Linear(2, 25, bias=True, weight_init=xavier_uniform)

    # 3 hidden layers (each 25 units)
    linear2 = Linear(25, 25, bias=True, weight_init=xavier_uniform)
    linear3 = Linear(25, 25, bias=True, weight_init=xavier_uniform)
    linear4 = Linear(25, 25, bias=True, weight_init=xavier_uniform)

    # output layer (2 output units)
    linear5 = Linear(25, 2, bias=True, weight_init=xavier_uniform)

    layers.append(linear1)
    layers.append(Relu())
    layers.append(linear2)
    layers.append(Relu())
    layers.append(linear3)
    layers.append(Relu())
    layers.append(linear4)
    layers.append(Tanh())
    layers.append(linear5)

    model = Sequential(layers)
    if verbose:
        print("Number of model parameters: {}".format(
            sum([len(p) for p in model.param()])))

    criterion = MSE()
    optimizer = SGD(model, lr=lr)

    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []
    train_errors, test_errors = [], []

    if verbose: print("--- Training ---")
    for epoch in range(1, epochs + 1):
        if verbose: print("Epoch: {}".format(epoch))

        # TRAINING
        for batch_idx in range(0, train_data.size(0), batch_size):
            # axis 0, start from batch_idx until batch_idx+batch_size
            output = model.forward(train_data.narrow(0, batch_idx, batch_size))

            # Calculate loss
            loss = criterion.forward(
                output, train_label.narrow(0, batch_idx, batch_size))
            train_losses.append(loss)
            if verbose: print("Train Loss: {:.2f}".format(loss.item()))

            # put to zero weights and bias
            optimizer.zero_grad()

            ## Backpropagation
            # Calculate grad of loss
            loss_grad = criterion.backward()

            # Grad of the model
            model.backward(loss_grad)

            # Update parameters
            optimizer.step()

        train_prediction = model.forward(train_data)
        acc = accuracy(train_prediction, train_label)
        train_accuracies.append(acc)
        train_errors.append(1 - acc)
        if verbose: print("Train Accuracy: {:.2f}".format(acc.item()))

        # EVALUATION
        for batch_idx in range(0, test_data.size(0), batch_size):
            # axis 0, start from batch_idx until batch_idx+batch_size
            output = model.forward(test_data.narrow(0, batch_idx, batch_size))

            # Calculate loss
            loss = criterion.forward(
                output, test_label.narrow(0, batch_idx, batch_size))
            test_losses.append(loss)
            if verbose: print("Test Loss: {:.2f}".format(loss.item()))

        test_prediction = model.forward(test_data)
        acc = accuracy(test_prediction, test_label)
        test_accuracies.append(acc)
        test_errors.append(1 - acc)
        if verbose: print("Test Accuracy: {:.2f}".format(acc.item()))

    return train_losses, test_losses, train_accuracies, test_accuracies, train_errors, test_errors
コード例 #7
0
def run_all_model(train_input,
                  train_target,
                  test_input,
                  test_target,
                  Sample_number,
                  save_plot=False):

    # Define constants along the test
    hidden_nb = 25
    std = 0.1
    eta = 3e-1
    batch_size = 200
    epochs_number = 1000

    # Model 1. No dropout; constant learning rate (SGD)
    print('\nModel 1: Optimizer: SGD; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model1'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = CrossEntropy()

    model_1 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_1.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_1 = train_model(model_1, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_1_perf = evaluate_model(model_1,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_1,
                                  save_plot,
                                  mname=mname)

    # Model 2. No dropout; decreasing learning rate (DecreaseSGD)
    print('\nModel 2: Optimizer: DecreaseSGD; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model2'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)

    model_2 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_2.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = DecreaseSGD(eta)

    # Train model
    my_loss_2 = train_model(model_2, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)
    # Evalute model and produce plots
    model_2_perf = evaluate_model(model_2,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_2,
                                  save_plot,
                                  mname=mname)

    # Model 3. No dropout; Adam Optimizer
    print('\nModel 3: Optimizer: Adam; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model3'

    # Custom hyperparameters
    eta_adam = 1e-3
    epochs_number_adam = 500

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = CrossEntropy()

    model_3 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_3.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Adam(eta_adam, 0.9, 0.99, 1e-8)

    # Train model
    my_loss_3 = train_model(model_3, train_input, train_target, optimizer,
                            epochs_number_adam, Sample_number, batch_size)

    # Evalute model and produce plots
    model_3_perf = evaluate_model(model_3,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_3,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE OPTIMIZERS
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=1)
        plt.plot(range(0, epochs_number), my_loss_2, linewidth=1)
        plt.plot(range(0, epochs_number_adam), my_loss_3, linewidth=1)
        plt.legend(["SGD", "Decreasing SGD", "Adam"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_optimizers.pdf', bbox_inches='tight')
        plt.close(fig)

    # Model 4. Dropout; SGD
    print('\nModel 4: Optimizer: SGD; Dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model4'

    # Define structure of the network
    dropout = 0.15

    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb, dropout=dropout)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb, dropout=dropout)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)

    model_4 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_4.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_4 = train_model(model_4, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_4_perf = evaluate_model(model_4,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_4,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE DROPOUT AND NO DROPOUT
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=1)
        plt.plot(range(0, epochs_number), my_loss_4, linewidth=1)
        plt.legend(["Without Dropout", "With Dropout"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_dropout.pdf', bbox_inches='tight')
        plt.close(fig)

    print('\nEvaluation of different activation functions\n')

    # Model 5. No Dropout; SGD; Tanh
    print('\nModel 5: Optimizer: SGD; No dropout; Tanh; CrossEntropy')

    # Define model name for plots
    mname = 'Model5'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Tanh()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Tanh()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Tanh()
    linear_4 = Linear(hidden_nb, 2)

    model_5 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_5.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_5 = train_model(model_5, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_5_perf = evaluate_model(model_5,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_5,
                                  save_plot,
                                  mname=mname)

    # Model 6. Xavier Initialization
    print(
        '\nModel 6: Optimizer: SGD; No dropout; Tanh; Xavier initialization; CrossEntropy'
    )

    # Define model name for plots
    mname = 'Model6'

    # Define network structure
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Tanh()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Tanh()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Tanh()
    linear_4 = Linear(hidden_nb, 2)

    model_6 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    model_6.xavier_parameters()
    optimizer = Sgd()

    # Train model
    my_loss_6 = train_model(model_6, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_6_perf = evaluate_model(model_6,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_6,
                                  save_plot,
                                  mname=mname)

    # Model 7. Sigmoid
    print('\nModel 7: Optimizer: SGD; No dropout; Sigmoid; CrossEntropy')

    # Define model name for plots
    mname = 'Model7'

    # Define parameter for sigmoid activation
    p_lambda = 0.1

    # Define network structure
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Sigmoid(p_lambda)
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Sigmoid(p_lambda)
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Sigmoid(p_lambda)
    linear_4 = Linear(hidden_nb, 2)

    model_7 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    model_7.normalize_parameters(mean=0.5, std=1)
    optimizer = Sgd(eta=0.5)

    # Train model
    my_loss_7 = train_model(model_7, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_7_perf = evaluate_model(model_7,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_7,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE EFFECT OF DIFFERENT ACTIVATIONS
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=0.5)
        plt.plot(range(0, epochs_number), my_loss_5, linewidth=0.5, alpha=0.8)
        plt.plot(range(0, epochs_number), my_loss_6, linewidth=0.5, alpha=0.8)
        plt.plot(range(0, epochs_number), my_loss_7, linewidth=0.5)
        plt.legend(["Relu", "Tanh", "Tanh (Xavier)", "Sigmoid"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_activations.pdf', bbox_inches='tight')
        plt.close(fig)

    print('\nEvaluation of base model with MSE loss\n')

    # Model 8. MSE loss
    print('\nModel 8: Optimizer: SGD; No dropout; Relu; MSE')

    # Define model name for plots
    mname = 'Model8'
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = LossMSE()

    model_8 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=loss)

    model_8.normalize_parameters(mean=0, std=std)
    optimizer = Sgd(eta)

    # Train model
    my_loss_8 = train_model(model_8, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_8_perf = evaluate_model(model_8,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_8,
                                  save_plot,
                                  mname=mname)

    print('Evaluation done! ')

    train_loss = torch.tensor([
        model_1_perf[0], model_2_perf[0], model_3_perf[0], model_4_perf[0],
        model_5_perf[0], model_6_perf[0], model_7_perf[0], model_8_perf[0]
    ])
    train_error = torch.tensor([
        model_1_perf[1], model_2_perf[1], model_3_perf[1], model_4_perf[1],
        model_5_perf[1], model_6_perf[1], model_7_perf[1], model_8_perf[1]
    ])
    test_loss = torch.tensor([
        model_1_perf[2], model_2_perf[2], model_3_perf[2], model_4_perf[2],
        model_5_perf[2], model_6_perf[2], model_7_perf[2], model_8_perf[2]
    ])
    test_error = torch.tensor([
        model_1_perf[3], model_2_perf[3], model_3_perf[3], model_4_perf[3],
        model_5_perf[3], model_6_perf[3], model_7_perf[3], model_8_perf[3]
    ])

    return train_loss, train_error, test_loss, test_error
コード例 #8
0
def model_tanh():
    return Sequential(Linear(2,25),Tanh(),Linear(25,25),
                        Tanh(), Linear(25,25), Tanh(),Linear(25,2))
コード例 #9
0
def model_relu():
    return  Sequential(Linear(2,25),Relu(),Linear(25,25),
                        Relu(),Linear(25,25), Relu(),Linear(25,2))
コード例 #10
0
    # Create training data
    np.random.seed(3010)
    mean = [0, 0]
    cov = [[1, 0], [0, 1]]
    x, y = np.random.multivariate_normal(mean, cov, NUMBER_SAMPLES).T
    x_train = np.concatenate((x.reshape(NUMBER_SAMPLES, 1), y.reshape(NUMBER_SAMPLES, 1)), axis=1)
    y_train = np.where(x >= 0, 1.0, 0.0).reshape(NUMBER_SAMPLES, 1)

    # Plot scatter data point
    if PLOT:
        plt.scatter(x=x_train[:, 0], y=x_train[:, 1], c=np.squeeze(y_train))
        plt.axis('equal')
        plt.show()

    # Model
    model = Sequential()
    model.add(layer_name='input', n_unit=2, activation=None)
    model.add(layer_name='dense', n_unit=3, activation='sigmoid')
    model.add(layer_name='dense', n_unit=2, activation='tanh')
    model.add(layer_name='output', n_unit=1, activation='sigmoid')

    # Training model
    t1 = time.time()
    loss = model.fit(x_train, y_train, epochs=EPOCHS, learning_rate=LEARNING_RATE)
    t2 = time.time()
    print("time to train = ", t2 - t1)

    # Plot loss graph
    if PLOT:
        plt.figure()
        plt.plot(range(EPOCHS), loss)
コード例 #11
0
ファイル: test.py プロジェクト: Neclow/ee559-project2
def main():
    '''
    Main function.
    Runs a single training, or 10 trials with default model, loss function and optimizer
    '''

    print('Default run: single training with default net, MSE loss and SGD.')
    print('Available activation functions: ReLU, tanh.')
    print(
        'Available criteria: "mse" for MSE loss (default), "cross" for cross-entropy loss'
    )
    print(
        'Available optimizers: "sgd" for SGD (default), "mom" for SGD + momentum, "adam" for Adam optimization'
    )

    print('Recommended learning rates: ')
    print('SGD: 1e-2 with MSE loss, 5e-4 with Cross-Entropy loss')
    print('SGD + Momentum: 1e-3 with MSE loss, 1e-4 with Cross-Entropy loss')
    print('Adam: 1e-3 \n')

    # Load default model
    net = Sequential([
        Linear(2, 25),
        ReLU(),
        Linear(25, 25),
        ReLU(),
        Linear(25, 25),
        ReLU(),
        Linear(25, 2)
    ])

    print(net)

    # Load default criterion and optimizer, with corresponding LR
    criterion = 'mse'
    optimizer = 'sgd'
    eta = 1e-2

    # Running mode: 'train' for single training, 'trial' for several trainings
    mode = 'train'
    #mode = 'trial'

    print(f'\n Selected mode: {mode} \n')
    time.sleep(1)

    if mode == 'train':
        print('To visualize data, change flag "plot_data" to True.')
        print(
            'To visualize training loss and predictions, change flag "plot_training" to True.'
        )
        plot_data = True
        plot_training = True
        run_train(net,
                  criterion,
                  optimizer,
                  eta,
                  plot_data=plot_data,
                  plot_training=plot_training)
    elif mode == 'trial':
        n_trials = 10
        trial(net,
              n_trials=n_trials,
              input_criterion=criterion,
              input_optimizer=optimizer,
              eta=eta,
              verbose=True)
    else:
        raise ValueError(
            'Running mode not found. Try "train" for simple train, "trial" for full trial.'
        )
コード例 #12
0
ファイル: main.py プロジェクト: HarshdeepGupta/mini-Keras
def main():
    model = Sequential()
    model.add(Layer(size = 2)) # Input layer
    model.add(Layer(size = 6,activation = 'relu')) # Hidden layer with 6 neurons
    model.add(Layer(size = 6,activation = 'relu')) # Hidden layer with 6 neurons
    model.add(Layer(size = 2,activation = 'softmax')) # Output layer
    model.compile(learning_rate = 0.1)
    # learn the XOR mapping
    X =  np.array([[1,0],[0,1],[0,0],[1,1]])
    Y =  np.array([[1,0],[1,0],[0,1],[0,1]])

    print(model.predict(X))
    model.fit(X,Y,iterations =10000)
    print(model.predict(X))
コード例 #13
0
sys.path.append('dl/')
from Sequential import Sequential
from Linear import Linear
from Functionnals import Relu
import Optimizer
import Criterion
from helpers import train, generate_disc_data, compute_accuracy

#setting the type of tensor
torch.set_default_dtype(torch.float32)

#disable autograd
torch.set_grad_enabled(False)

#create model
model = Sequential(Linear(2, 25), Relu(), Linear(25, 25), Relu(),
                   Linear(25, 25), Relu(), Linear(25, 2))

#create data_sets with one hot encoding for MSE
train_input, train_target = generate_disc_data(one_hot_labels=True)
test_input, test_target = generate_disc_data(one_hot_labels=True)

#normalize the data
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

#define loss
criterion = Criterion.MSE()

#define optimizer
optim = Optimizer.SGD(parameters=model.param(), lr=1e-1)