def runNN(n_hidden_max):

    losses = []  #training set
    losses_0 = []  #validation set

    ##(swap for more hidden nodes)
    for n_hidden in range(2, 3):  #(1,n_hidden_max+1):
        #print(n_hidden)
        eta = 0.001
        ndata = 50
        #n_hidden = 3

        # Training Data sets A and B
        mA, sigmaA = [0.0, -0.1], 0.3
        mB, sigmaB = [1.0, 0.3], 0.2
        mC, sigmaC = [-1.0, -0.3], 0.2
        #mD, sigmaD = [3, 4], 0.7

        ## Really quick just added a second set C to check this actually works on lin non-seperable.
        ## @Bryan please plug in the correct data set
        A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA,
                                                   math.floor(ndata * 2))
        B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata)
        C = _3_1_1_lin_seperable_data.generateData(2, mC, sigmaC, ndata)
        # D = _3_1_1_lin_seperable_data.generateData(2, mD, sigmaD, ndata)
        B = np.hstack([B, C])
        # B = np.hstack([B,D])

        #create validation set
        A_0 = A[:, 0]
        B_0 = B[:, 0]
        for _i in range(int(round(ndata * 2 * .25)) - 1):
            _e = np.random.randint(A.shape[1])
            A_0 = np.vstack([A_0, A[:, _e]])
            A = np.delete(A, _e, axis=1)
            B_0 = np.vstack([B_0, B[:, _e]])
            B = np.delete(B, _e, axis=1)

        A_0 = np.transpose(A_0)
        B_0 = np.transpose(B_0)

        X = np.hstack([A, B])
        X_0 = np.hstack([A_0, B_0])

        # Add biasing term
        X = np.vstack([X, np.ones(X.shape[1])])
        X_0 = np.vstack([X_0, np.ones(X_0.shape[1])])

        # Targets
        T_A, T_B = np.ones(A.shape[1]), -1 * np.ones(B.shape[1])
        T = np.hstack([T_A, T_B])
        T_A_0, T_B_0 = np.ones(A_0.shape[1]), -1 * np.ones(B_0.shape[1])
        T_0 = np.hstack([T_A_0, T_B_0])

        # Wj = weights at layer j
        # Wk = weights at layer k
        # Wkj has a + 1 to reflect the weights associated with the bias
        Wji = _3_1_2_single_layer_perceptron.initializeWeights(
            X.shape[0], n_hidden)
        Wkj = _3_1_2_single_layer_perceptron.initializeWeights(n_hidden + 1, 1)

        layer1 = Layer(activation=v_sigmoid, d_activation=v_d_sigmoid)
        layer2 = Layer(activation=v_sigmoid, d_activation=v_d_sigmoid)

        # For matplotlib window
        maxX = X[:, np.argmax(X[0])][0] + 0.5
        minX = X[:, np.argmin(X[0])][0] - 0.5
        maxY = X[:, np.argmax(X[1])][1] + 0.5
        minY = X[:, np.argmin(X[1])][1] - 0.5

        eta = 0.01

        plt.ion()
        start = time.time()

        for epoch in range(1000):

            def batch_learning(Wji, Wkj):
                #         +-------+                +-------+
                #  X(3x1)+>Affine1+--->Hj_in(4x1)+->Sigmoid+->Hj_out(4x1)
                #         +-------+                +-------+
                #                                  +---------+
                #          Wji(4x3)                |D_sigmoid+->Hj_phid(4x1)
                #                                  +---------+
                h_out = layer1.forward(Wji, X, pad=True)
                #         +-------+                +-------+
                #  Hj_out+>Affine1+--->Yk_in(1x1)+->Sigmoid+->Yk_out(1x1)
                #  Biased +-------+                +-------+
                #  (5x1)                           +---------+
                #          Wji(1x5)                |D_sigmoid+->Yk_phid(1x1)
                #                                  +---------+

                y_out = layer2.forward(Wkj, h_out, pad=False)

                classification_percent = np.sum(np.sign(y_out) == T) / len(T)

                error = _3_1_2_single_layer_perceptron.error(T, y_out)
                error = error / len(T)
                losses.append(error)

                print(
                    f"Epoch: {epoch} Error: {error} Percentage: {classification_percent}"
                )

                d_y_out = y_out - T
                d_y_in = layer2.d_in(d_y_out)
                DELTA_Wkj = DELTA(eta, d_y_in, h_out)

                d_h_out = layer1.d_out_hidden(d_y_in, Wkj)
                d_h_in = layer1.d_in(d_h_out)
                DELTA_Wji = DELTA(eta, d_h_in, X)[:-1, :]

                h_out_0 = layer1.forward(Wji, X_0, pad=True)
                y_out_0 = layer2.forward(Wkj, h_out_0, pad=False)
                error_0 = _3_1_2_single_layer_perceptron.error(T_0, y_out_0)
                error_0 = error_0 / len(T_0)
                losses_0.append(error_0)

                Wji = Wji + DELTA_Wji
                Wkj = Wkj + DELTA_Wkj
                return Wji, Wkj

            Wji, Wkj = batch_learning(Wji, Wkj)

            #print (max(np.max(DELTA_Wji), np.max(DELTA_Wji)))

            def sequential_learning(Wji, Wkj):

                Wji_old = np.copy(Wji)
                Wkj_old = np.copy(Wkj)

                classification_percent = 0.0
                error = 0.0

                for i in range(len(T)):

                    X_sample = np.transpose(np.atleast_2d(X[:, i]))
                    T_sample = np.atleast_2d(T[i])

                    #pass forward layer 1
                    layer_1_IN = _3_1_2_single_layer_perceptron.forwardPass(
                        Wji, X_sample)
                    h_out = v_sigmoid(layer_1_IN)
                    layer_1_PHID = v_d_sigmoid(layer_1_IN)

                    #bias layer 1
                    padding = np.ones(h_out.shape[1])
                    h_out = np.vstack([h_out, padding])
                    layer_1_PHID = np.vstack([layer_1_PHID, padding])

                    #pass forward layer 2
                    layer_2_IN = _3_1_2_single_layer_perceptron.forwardPass(
                        Wkj, h_out)
                    y_out = v_sigmoid(layer_2_IN)
                    layer_2_PHID = v_d_sigmoid(layer_2_IN)

                    classification_percent += np.sum(
                        np.sign(y_out) == T_sample)

                    error += _3_1_2_single_layer_perceptron.error(
                        T_sample, y_out)

                    #backprop

                    d_y_out = y_out - T_sample
                    d_y_in = d_y_out * layer_2_PHID
                    DELTA_Wkj = DELTA(eta, d_y_in, h_out)

                    d_h_out = np.matmul(np.transpose(Wkj), d_y_in)
                    d_h_in = d_h_out * layer_1_PHID
                    DELTA_Wji = DELTA(eta, d_h_in, X_sample)[:-1, :]

                    Wji = Wji + DELTA_Wji
                    Wkj = Wkj + DELTA_Wkj

                print(
                    f"Epoch: {epoch} Error: {error} Percentage: {classification_percent/len(T)}"
                )
                error = error / len(T)
                losses.append(error)

                h_out_0 = layer1.forward(Wji_old, X_0, pad=True)
                y_out_0 = layer2.forward(Wkj_old, h_out_0, pad=False)
                error_0 = _3_1_2_single_layer_perceptron.error(T_0, y_out_0)
                error_0 = error_0 / len(T_0)
                losses_0.append(error_0)

                return Wji, Wkj

            #Wji, Wkj = sequential_learning(Wji, Wkj)

            __x = np.arange(-5, 5, 0.5)

            def decisionBoundary(x, normal):
                # 0 = w0x + w1y + w2
                # y = (-w0x - w2) / w1
                if normal.shape[0] == 3:
                    return (-normal[0] * x - normal[2]) / normal[1]
                else:
                    return (-normal[0] * x) / normal[1]

            def plot(n_hidden_layer):
                plt.clf()
                plt.ylim(minY, maxY)
                plt.xlim(minX, maxX)

                plt.plot(A[0], A[1], "ro", B[0], B[1], "bo")
                for _i in range(n_hidden_layer):
                    plt.plot(__x,
                             decisionBoundary(__x, np.transpose(Wji[_i, :])),
                             label=f"hidden node: {_i}")
                plt.title("Grid")
                plt.xlabel("X-Coord")
                plt.ylabel("Y-Coord")
                plt.legend()
                plt.show()
                plt.pause(0.0000001)

            #plot()

        elapsed = time.time() - start

        print(f"\nElapsed: {elapsed}")

        print(Wji)
        print(Wkj)

        plt.ioff()
        if (n_hidden == 2):
            plot(n_hidden)

    return losses, losses_0
def main():

    train_set_percentage = TRAIN_SET_PERCENTAGE

    hidden_layer_nodes = HIDDEN_LAYER_NODES

    # Data Set
    patterns, target = makeData()

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(xs=patterns[0], ys=patterns[1], zs=target.flatten())
    plt.show()
    exit()

    # Subsample Train and Test sets
    if MAKE_VALIDATE_SET:
        num_samples = target.shape[1]
        args = np.arange(num_samples)
        np.random.shuffle(args)
        split_idx = int(num_samples * (train_set_percentage))
        train_args, test_args = args[:split_idx], args[split_idx:]
        patterns_test, patterns = patterns[:, test_args], patterns[:,
                                                                   train_args]
        target_test, target = target[:, test_args], target[:, train_args]

    W_1 = _3_1_2_single_layer_perceptron.initializeWeights(
        3, hidden_layer_nodes)
    W_2 = _3_1_2_single_layer_perceptron.initializeWeights(
        hidden_layer_nodes + 1, 1)

    layer1 = _3_2_1.Layer(activation=_3_2_1.v_sigmoid,
                          d_activation=_3_2_1.v_d_sigmoid)
    layer2 = _3_2_1.Layer(activation=np.vectorize(lambda x: x),
                          d_activation=np.vectorize(lambda x: 1))

    train_loss = []
    test_loss = []

    for epoch in range(EPOCHS):

        ## Training
        h_out = layer1.forward(W_1, patterns, pad=True)
        y_out = layer2.forward(W_2, h_out, pad=False)
        error = _3_1_2_single_layer_perceptron.error(target, y_out)
        d_y_out = y_out - target
        d_y_in = layer2.d_in(d_y_out)
        DELTA_W_2 = _3_2_1.DELTA(ETA, d_y_in, h_out)
        d_h_out = layer1.d_out_hidden(d_y_in, W_2)
        # trim off last term of d_h_in since h_in was padded with bias
        d_h_in = layer1.d_in(d_h_out)[:-1]
        DELTA_W_1 = _3_2_1.DELTA(ETA, d_h_in, patterns)
        train_loss.append(error)

        ## Test
        test_error = 0
        if MAKE_VALIDATE_SET:
            h_out_test = layer1.forward(W_1, patterns_test, pad=True)
            y_out_test = layer2.forward(W_2, h_out_test, pad=False)
            test_error = _3_1_2_single_layer_perceptron.mse(
                target_test, y_out_test)
            test_loss.append(test_error)

        print(f"Epoch: {epoch} Train error: {error} Test Error: {test_error}")

        W_1 = W_1 + DELTA_W_1
        W_2 = W_2 + DELTA_W_2

        deltaMax = max(np.max(DELTA_W_1), np.max(DELTA_W_2))
        if deltaMax < 10**-5:
            break

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(xs=patterns[0], ys=patterns[1], zs=y_out.flatten())
    plt.show()

    return train_loss, test_loss
def runNN():

    eta = 0.001
    ndata = 100
    n_hidden = 4

    vsigmoid = np.vectorize(sigmoid)
    vd_sigmoid = np.vectorize(d_sigmoid)

    # Training Data sets A and B
    mA, sigmaA = [1.0, 0.5], 0.5
    mB, sigmaB = [-2.0, 0.0], 0.5
    mC, sigmaC = [5, 0.0], 0.5

    ## Really quick just added a second set C to check this actually works on lin non-seperable.
    ## @Bryan please plug in the correct data set
    A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA, ndata)
    B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata)
    C = _3_1_1_lin_seperable_data.generateData(2, mC, sigmaC, ndata)
    B = np.hstack([B, C])
    X = np.hstack([A, B])
    # Add biasing term
    X = np.vstack([X, np.ones(X.shape[1])])

    # Targets
    T_A, T_B = np.ones(A.shape[1]), -1 * np.ones(B.shape[1])
    T = np.hstack([T_A, T_B])

    # Wj = weights at layer j
    # Wk = weights at layer k
    # Wkj has a + 1 to reflect the weights associated with the bias
    Wji = _3_1_2_single_layer_perceptron.initializeWeights(
        X.shape[0], n_hidden)
    Wkj = _3_1_2_single_layer_perceptron.initializeWeights(n_hidden + 1, 1)

    losses = []

    plt.plot(A[0], A[1], "ro", B[0], B[1], "bo")
    plt.show()

    for epoch in range(100):

        #         +-------+                +-------+
        #  X(3x1)+>Affine1+--->Hj_in(4x1)+->Sigmoid+->Hj_out(4x1)
        #         +-------+                +-------+
        #                                  +---------+
        #          Wji(4x3)                |D_sigmoid+->Hj_phid(4x1)
        #                                  +---------+

        # forward pass
        # Hj_in = input to layer J
        # Hj_out = output at layer j
        # Yk= output at layer k
        Hj_in = _3_1_2_single_layer_perceptron.forwardPass(Wji, X)
        Hj_bias = np.ones(Hj_in.shape[1])
        Hj_out = np.vstack([vsigmoid(Hj_in), Hj_bias])
        Hj_phid = np.vstack([vd_sigmoid(Hj_in), Hj_bias])

        #         +-------+                +-------+
        #  Hj_out+>Affine1+--->Yk_in(1x1)+->Sigmoid+->Yk_out(1x1)
        #  Biased +-------+                +-------+
        #  (5x1)                           +---------+
        #          Wji(1x5)                |D_sigmoid+->Yk_phid(1x1)
        #                                  +---------+

        Yk_in = _3_1_2_single_layer_perceptron.forwardPass(Wkj, Hj_out)
        Yk_out = vsigmoid(Yk_in)
        Yk_phid = vd_sigmoid(Yk_in)

        classification_percent = np.sum(np.sign(Yk_out) == T) / len(T)

        error = _3_1_2_single_layer_perceptron.error(T, Yk_out)
        losses.append(error)

        print(
            f"Epoch: {epoch} Error: {error} Percentage: {classification_percent}"
        )

        eta = 0.001
        d_k = dFinal(T, Yk_out, Yk_phid)
        DELTA_Wkj = DELTA(eta, d_k, Hj_out)

        d_j = dHidden(d_k, Wkj, Hj_phid)
        DELTA_Wji = DELTA(eta, d_j, X)[:-1, :]

        Wji = Wji + DELTA_Wji
        Wkj = Wkj + DELTA_Wkj

        print(max(np.max(DELTA_Wji), np.max(DELTA_Wji)))

        if max(np.max(DELTA_Wji), np.max(DELTA_Wji)) < 10 ^ -6:
            break

    return losses
def runNN():
    eta = 0.1
    ndata = 100

    vsigmoid = np.vectorize(_3_2_1_two_layer_network.sigmoid)
    vd_sigmoid = np.vectorize(_3_2_1_two_layer_network.d_sigmoid)

    #define nodes
    n_hidden = 3
    n_first = 8
    n_last = n_first

    # Training Data sets A
    A = generate_Data(n_first)
    print(A.shape[0])
    X = np.vstack([A, np.ones(A.shape[1])])

    #Targets = Training Data

    #random weights
    Wji = _3_1_2_single_layer_perceptron.initializeWeights(
        X.shape[0], n_hidden)
    Wkj = _3_1_2_single_layer_perceptron.initializeWeights(
        n_hidden + 1, n_last)

    losses = []

    for epoch in range(5000):

        Hj_in = _3_1_2_single_layer_perceptron.forwardPass(Wji, X)
        Hj_bias = np.ones(Hj_in.shape[1])
        Hj_out = np.vstack([vsigmoid(Hj_in), Hj_bias])
        Hj_phid = np.vstack([vd_sigmoid(Hj_in), Hj_bias])

        Yk_in = _3_1_2_single_layer_perceptron.forwardPass(Wkj, Hj_out)
        Yk_out = vsigmoid(Yk_in)
        Yk_phid = vd_sigmoid(Yk_in)

        classification_percent = np.sum(np.sign(Yk_out) == A) / len(A)  ##T

        error = _3_1_2_single_layer_perceptron.error(A, Yk_out)
        losses.append(error)

        print(
            f"Epoch: {epoch} Error: {error}  Percentage: {classification_percent}"
        )

        d_k = _3_2_1_two_layer_network.dFinal(A, Yk_out, Yk_phid)
        DELTA_Wkj = _3_2_1_two_layer_network.DELTA(eta, d_k, Hj_out)

        d_j = _3_2_1_two_layer_network.dHidden(d_k, Wkj, Hj_phid)
        DELTA_Wji = _3_2_1_two_layer_network.DELTA(eta, d_j, X)[:-1, :]

        Wji = Wji + DELTA_Wji
        Wkj = Wkj + DELTA_Wkj

        print(max(np.max(DELTA_Wji), np.max(DELTA_Wji)))

        if max(np.max(DELTA_Wji), np.max(DELTA_Wji)) < 10 ^ -6:
            break

        #corresponding internal code (representing binary conversion)
        #-> should be impossible for 2 hidden layer (can be seen at Yk_out)
        #with enough steps (50K is enough) error goes close to 0.
        #network "encodes" Data in strength of hj_out an in incredible high weights of Wkj.
        #hj_out is either 0.3 0.5 or 1 (and those negated)

    # print(Wji)
    print(np.sign(np.transpose(Hj_out[:-1])))
    print(np.all(np.sign(Yk_out) == A))

    for i in range(8):
        print(
            f"{np.transpose(A)[i]} & {np.sign(np.transpose(Hj_out[:-1])[i])}")

    print(np.sign(Wji))

    return losses
def main():

    eta = 0.001
    ndata = 100
    subsampleCase = 3
    part = 2

    plt.ion()

    # 3.1.3 first part (Overlapping clouds)

    if part == 1:
        mA, sigmaA = [1.0, 0.3], 0.2
        mB, sigmaB = [0.5, 0.0], 0.3

        A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA, ndata)
        B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata)

        plt.plot(A[0],A[1],"bo")
        plt.plot(B[0],B[1],"ro")
        plt.show()


    # 3.1.3 second part (split class A into 2 parts)
    elif part == 2:
        #Training Data sets A and B
        mA, sigmaA = [1.0, 0.3], 0.2
        mB, sigmaB = [0.0, -0.1], 0.3

        A = generateNonlinearData(2, mA, sigmaA, ndata)
        B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata)

        #plt.plot(A[0],A[1],"go")
        #plt.plot(B[0],B[1],"mo")
        #plt.show()


    # Subselection
    #subsampleCase == 0:
        A_0 = np.asarray([np.random.choice(i, int(round(ndata * .75)), replace=False) for i in A])
        B_0 = np.asarray([np.random.choice(i, int(round(ndata * .75)), replace=False) for i in B])
    #subsampleCase == 1:
        A_1 = np.asarray([np.random.choice(i, int(round(ndata * .50)), replace=False) for i in A])
        B_1 = B
    #subsampleCase == 2:
        A_2 = A
        B_2 = np.asarray([np.random.choice(i, int(round(ndata * .50)), replace=False) for i in B])
    #subsampleCase == 3:

        #TODO: apply to entire Class A not A[0]


        #A_g = greater then 0 in A[0,:]
        A_g = A[:,:50]
        A_l = A[:,50:]


        A_l_1 = A_l[:, np.random.choice(A_l.shape[1], int(round(A_l.shape[1] * 0.80)), replace=False)] #only take 80%
        A_g_1 = A_g[:, np.random.choice(A_g.shape[1], int(round(A_g.shape[1] * 0.20)), replace=False)] #only take 20%


        A_3 = np.hstack([A_l_1,A_g_1])
        B_3 = B



        X_0 = np.hstack([A_0, B_0])
        T_A_0, T_B_0 = np.ones(A_0.shape[1]), -1 * np.ones(B_0.shape[1])

        X_1 = np.hstack([A_1, B_1])
        T_A_1, T_B_1 = np.ones(A_1.shape[1]), -1 * np.ones(B_1.shape[1])

        X_2 = np.hstack([A_2, B_2])
        T_A_2, T_B_2 = np.ones(A_2.shape[1]), -1 * np.ones(B_2.shape[1])

        X_3 = np.hstack([A_3, B_3])
        T_A_3, T_B_3 = np.ones(A_3.shape[1]), -1 * np.ones(B_3.shape[1])




        # Add biasing term
        X_0 = np.vstack([X_0, np.ones(X_0.shape[1])])
        X_1 = np.vstack([X_1, np.ones(X_1.shape[1])])
        X_2 = np.vstack([X_2, np.ones(X_2.shape[1])])
        X_3 = np.vstack([X_3, np.ones(X_3.shape[1])])

        # Targets
        T_0 = np.hstack([T_A_0, T_B_0])
        T_1 = np.hstack([T_A_1, T_B_1])
        T_2 = np.hstack([T_A_2, T_B_2])
        T_3 = np.hstack([T_A_3, T_B_3])


        W_0 = _3_1_2_single_layer_perceptron.initializeWeights(X_0.shape[0], 1)
        W_1 = _3_1_2_single_layer_perceptron.initializeWeights(X_1.shape[0], 1)
        W_2 = _3_1_2_single_layer_perceptron.initializeWeights(X_2.shape[0], 1)
        W_3 = _3_1_2_single_layer_perceptron.initializeWeights(X_3.shape[0], 1)
        # print (W.shape)

        losses_0 = []
        losses_1 = []
        losses_2 = []
        losses_3 = []

        start = time.time()

        #train in batch mode
        for i in range(2):
           for _ in range(ndata):

               Y_0 = _3_1_2_single_layer_perceptron.forwardPass(W_0, X_0)
               Y_1 = _3_1_2_single_layer_perceptron.forwardPass(W_1, X_1)
               Y_2 = _3_1_2_single_layer_perceptron.forwardPass(W_2, X_2)
               Y_3 = _3_1_2_single_layer_perceptron.forwardPass(W_3, X_3)

               error_0 = _3_1_2_single_layer_perceptron.error(T_0,Y_0)
               error_1 = _3_1_2_single_layer_perceptron.error(T_1,Y_1)
               error_2 = _3_1_2_single_layer_perceptron.error(T_2,Y_2)
               error_3 = _3_1_2_single_layer_perceptron.error(T_3,Y_3)



               #print("part1.error: ", _, " ", error)
               losses_0.append(error_0)
               losses_1.append(error_1)
               losses_2.append(error_2)
               losses_3.append(error_3)

               dW_0 = -eta * np.matmul((Y_0 - T_0), np.transpose(X_0))
               dW_1 = -eta * np.matmul((Y_1 - T_1), np.transpose(X_1))
               dW_2 = -eta * np.matmul((Y_2 - T_2), np.transpose(X_2))
               dW_3 = -eta * np.matmul((Y_3 - T_3), np.transpose(X_3))

               W_0 = W_0 + dW_0
               W_1 = W_1 + dW_1
               W_2 = W_2 + dW_2
               W_3 = W_3 + dW_3


               #
               def line(x, normal):
                   # 0 = w0x + w1y + w2
                   # y = (-w0x - w2) / w1
                   return (-normal[:, 0] * x - normal[:, 2]) / normal[:, 1]


               X = np.hstack([X_0,X_1,X_2,X_3])

               maxX = X[:, np.argmax(X[0])][0] + 0.5
               minX = X[:, np.argmin(X[0])][0] - 0.5
               maxY = X[:, np.argmax(X[1])][1] + 0.5
               minY = X[:, np.argmin(X[1])][1] - 0.5

               __x = np.arange(minX, maxX, 0.05)


               #        plt.plot(x_1, line(x_1, W_1))
               #       plt.plot(x_2, line(x_2, W_2))
               #      plt.plot(x_3, line(x_3, W_3))

               def plot():
                   plt.clf()
                   plt.ylim(minY, maxY)
                   plt.xlim(minX, maxX)
                   plt.plot(A[0], A[1], "ro", B[0], B[1], "bo")
                   plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_0), label="25 each")
                   plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_1), label="50 A")
                   plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_2), label="50 B")
                   plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_3), label="splitted A")
                   plt.title("Grid")
                   plt.xlabel("X-Coord")
                   plt.ylabel("Y-Coord")
                   plt.legend()
                   plt.show()
                   plt.pause(0.000001)


               plot()

        elapsed = time.time() - start

        print(f"\nElapsed: {elapsed}")

        plt.ioff()
        plot()

    return losses_0, losses_1, losses_2, losses_3