for j in xrange(60000):

    # Feed forward through layers 0, 1, and 2
    l0 = X
    l1 = l0.dotProduct(syn0).nonlin()
    l2 = l1.dotProduct(syn1).nonlin()

    # how much did we miss the target value?
    l2_error = y - l2

    if (j% 10000) == 0:
        print "Error:" + str(l2_error.abs().mean())

    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l2_delta = l2_error * l2.nonlin(True)

    # how much did each l1 value contribute to the l2 error (according to the weights)?
    l1_error = l2_delta.dotProduct(syn1.transpose())

    # in what direction is the target l1?
    # were we really sure? if so, don't change too much.
    l1_delta = l1_error * l1.nonlin(True)

    syn1 += l1.transpose().dotProduct(l2_delta)
    syn0 += l0.transpose().dotProduct(l1_delta)

p = Matrix([Vector([1., 0., 1.])])
print p.dotProduct(syn0).nonlin().dotProduct(syn1).nonlin()
# input data

X = Matrix([Vector([0.,0.,1.]),
           Vector([0.,1.,1.]),
           Vector([1.,0.,1.]),
            Vector([1.,1.,1.])])

y = Matrix([Vector([0.,0.,1.,1.])]).transpose()

syn0 = 2 * Matrix.random(3,1) - 1
print syn0

for iter in xrange(10000):
    # forward propagation
    l0 = X
    l1 = (l0.dotProduct(syn0)).nonlin()

    # error eval
    l1_error = y - l1

    # multiply how much we missed by the
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * l1.nonlin(True)

    # weight update
    syn0 = syn0 + (l0.transpose()).dotProduct(l1_delta)

print "output after training"
print (X.dotProduct(syn0)).nonlin()
y = Matrix([Vector([0.0, 1.0, 1.0, 0.0])]).transpose()

# randomly initialize our weights with mean 0 (entry + 1 for bias unit)
syn0 = 2 * Matrix.random(4, 4) - 1
syn1 = 2 * Matrix.random(5, 1) - 1

# set the alpha
alpha = 10

for j in xrange(1000):

    # Feed forward through layers 0, 1, and 2
    l0 = Xones
    l1 = l0.dotProduct(syn0).nonlin()
    l1 = Matrix(Matrix.ones(1, l0.getRowLen())._matrix + l1.transpose()._matrix).transpose()
    l2 = l1.dotProduct(syn1).nonlin()

    # how much did we miss the target value?
    l2_error = y - l2

    if (j % 100) == 0:
        print "Error:" + str(l2_error.abs().mean())

    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l2_delta = l2_error * l2.nonlin(True)

    # how much did each l1 value contribute to the l2 error (according to the weights)?
    l1_error = l2_delta.dotProduct(syn1.transpose())

    # in what direction is the target l1?
# randomly initialize our weights with mean 0 (entry + 1 for bias unit)
syn0 = 2 * Matrix.random(4, 4) - 1
syn1 = 2 * Matrix.random(5, 4) - 1
syn2 = 2 * Matrix.random(5, 1) - 1

# set the alpha
alpha = 0.07

for j in xrange(10000):

    # Feed forward through layers 0, 1, 2 and 3
    l0 = Xones
    l1 = l0.dotProduct(syn0).nonlin()
    l1tmp = Matrix(Matrix.ones(1, l0.getRowLen())._matrix + l1.transpose()._matrix).transpose()
    l2 = l1tmp.dotProduct(syn1).nonlin()
    l2tmp = Matrix(Matrix.ones(1, l2.getRowLen())._matrix + l2.transpose()._matrix).transpose()
    l3 = l2tmp.dotProduct(syn2).nonlin()

    # how much did we miss the target value?
    l3_error = y - l3

    if (j % 100) == 0:
        print "Error:" + str(l3_error.abs().mean())

    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l3_delta = l3_error * l3.nonlin(True)

    # how much did each l2 value contribute to the l3 error (according to the weights)?
    l2_error = l3_delta.dotProduct(syn2.transpose())