def test_GetParamsThenConstructor(self):
     nn1 = MLPR(layers=[L("Linear")], n_iter=1)
     a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4))
     nn1._initialize(a_in, a_out)
     
     p1 = nn1.get_parameters()
     print(len(p1))
     nn2 = MLPR(layers=[L("Linear")], n_iter=1, parameters=p1)
     nn2._initialize(a_in, a_out)
     p2 = nn2.get_parameters()
     print(len(p2))
     
     assert_true((p1[0].weights.astype('float32') == p2[0].weights.astype('float32')).all())
     assert_true((p1[0].biases.astype('float32') == p2[0].biases.astype('float32')).all())
    def test_GetParamsThenConstructor(self):
        nn1 = MLPR(layers=[L("Linear")], n_iter=1)
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4))
        nn1._initialize(a_in, a_out)

        p1 = nn1.get_parameters()
        print(len(p1))
        nn2 = MLPR(layers=[L("Linear")], n_iter=1, parameters=p1)
        nn2._initialize(a_in, a_out)
        p2 = nn2.get_parameters()
        print(len(p2))

        assert_true(
            (p1[0].weights.astype('float32') == p2[0].weights.astype('float32')
             ).all())
        assert_true(
            (p1[0].biases.astype('float32') == p2[0].biases.astype('float32')
             ).all())
 def test_SetLayerParamsDict(self):
     nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')])
     a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4))
     nn._initialize(a_in, a_out)
     
     weights = numpy.random.uniform(-1.0, +1.0, (32,4))
     biases = numpy.random.uniform(-1.0, +1.0, (4,))
     nn.set_parameters({'abcd': (weights, biases)})
     
     p = nn.get_parameters()
     assert_true((p[1].weights.astype('float32') == weights.astype('float32')).all())
     assert_true((p[1].biases.astype('float32') == biases.astype('float32')).all())
 def test_SetLayerParamsList(self):
     nn = MLPR(layers=[L("Linear")])
     a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4))
     nn._initialize(a_in, a_out)
     
     weights = numpy.random.uniform(-1.0, +1.0, (16,4))
     biases = numpy.random.uniform(-1.0, +1.0, (4,))
     nn.set_parameters([(weights, biases)])
     
     p = nn.get_parameters()
     assert_true((p[0].weights.astype('float32') == weights.astype('float32')).all())
     assert_true((p[0].biases.astype('float32') == biases.astype('float32')).all())
 def test_GetLayerParams(self):
     nn = MLPR(layers=[L("Linear")], n_iter=1)
     a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4))
     nn._initialize(a_in, a_out)
     
     p = nn.get_parameters()
     assert_equals(type(p), list)
     assert_true(isinstance(p[0], tuple))
     
     assert_equals(p[0].layer, 'output')
     assert_equals(p[0].weights.shape, (16, 4))
     assert_equals(p[0].biases.shape, (4,))
    def test_GetLayerParams(self):
        nn = MLPR(layers=[L("Linear")], n_iter=1)
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4))
        nn._initialize(a_in, a_out)

        p = nn.get_parameters()
        assert_equals(type(p), list)
        assert_true(isinstance(p[0], tuple))

        assert_equals(p[0].layer, 'output')
        assert_equals(p[0].weights.shape, (16, 4))
        assert_equals(p[0].biases.shape, (4, ))
 def test_LayerParamsSkipOneWithNone(self):
     nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')])
     a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4))
     nn._initialize(a_in, a_out)
     
     weights = numpy.random.uniform(-1.0, +1.0, (32,4))
     biases = numpy.random.uniform(-1.0, +1.0, (4,))
     nn.set_parameters([None, (weights, biases)])
     
     p = nn.get_parameters()
     assert_true((p[1].weights == weights).all())
     assert_true((p[1].biases == biases).all())
    def test_SetLayerParamsDict(self):
        nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')])
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4))
        nn._initialize(a_in, a_out)

        weights = numpy.random.uniform(-1.0, +1.0, (32, 4))
        biases = numpy.random.uniform(-1.0, +1.0, (4, ))
        nn.set_parameters({'abcd': (weights, biases)})

        p = nn.get_parameters()
        assert_true((
            p[1].weights.astype('float32') == weights.astype('float32')).all())
        assert_true(
            (p[1].biases.astype('float32') == biases.astype('float32')).all())
    def test_SetLayerParamsList(self):
        nn = MLPR(layers=[L("Linear")])
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4))
        nn._initialize(a_in, a_out)

        weights = numpy.random.uniform(-1.0, +1.0, (16, 4))
        biases = numpy.random.uniform(-1.0, +1.0, (4, ))
        nn.set_parameters([(weights, biases)])

        p = nn.get_parameters()
        assert_true((
            p[0].weights.astype('float32') == weights.astype('float32')).all())
        assert_true(
            (p[0].biases.astype('float32') == biases.astype('float32')).all())
Exemple #10
0
def main():
    
    #Initialize the board - the state
    state = np.ndarray(shape=(3,3), dtype=int)
    win_states = makeWinStates()
    actions = [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [2,2]] #also spaces
    
    #Variables - might not be necessary
    k = 1
    alpha = 1/k
    gamma = .3
    eps = .1

    #Initializing our 'overkill' neural networks
    actor = Regressor(layers, warning=None, weights=None, random_state=None, 
                     learning_rule='sgd', 
                     learning_rate=0.01, 
                     learning_momentum=0.9, 
                     regularize=None, weight_decay=None, 
                     dropout_rate=None, batch_size=1, 
                     n_iter=None, n_stable=10, 
                     f_stable=0.001, valid_set=None, 
                     valid_size=0.0, loss_type=None, 
                     callback=None, debug=False, 
                     verbose=None) #???
    
    #Training the actor with a random policy
    trainStates = []; acts = []
    for i in range(500):
	sample_st = np.ndarray(shape=(3,3), dtype=int)
	for j in range(9):
	    sample_st[math.floor(j/3),j%3] = random.randint(-1,1)
	    
	act = random.randint(0,8) #action represented by its index
	trainStates.append(sample_st)
	acts.append(act)
	
    actor.fit(trainStates, acts)
    
    target_mu = actor
    
    
    critic = Regressor(layers=[Layer("Rectifier", name="layer1", units=11, pieces=2), #9 squares, 1 action, 1 bias
                       Layer("Rectifier", name="layer2", units=11, pieces=2),
                       Layer("Rectifier", name="layer3", units=11, pieces=2),
                       Layer("Softmax")], learning_rate=0.02)
    
    #Randomly initialize the critic
    statesAndActs = []; rewards = []
    for i in range(500):
	sample_st = np.ndarray(shape=(3,3), dtype=int)
	for j in range(9):
	    sample_st[math.floor(j/3),j%3] = random.randint(-1,1)

	#random action, random reward
	act = random.randint(0,8)
	rew = random.randint(-1,1)

	statesAndActs.append([sample_st,act])
	rewards.append(rew)

    critic.fit(statesAndActs,rewards)

    target_Q = critic

    
    for i in range(10):
	reward = 0; end = False; R = []
	
	while (end != True):

	    action = actor.predict(state)
	    newstate = getNextState(state, action) #Execute action
	    
	    #Observe reward
	    reward = getReward(state)
	    if reward != 0: #Game is done
		end = True		    
	    
	    #Replay buffer review
	    R.append(state, action, reward, newstate)
	    
	    N = math.floor(math.log(len(R)))
	    R2 = R; minibatch = []
	    for i in range(N):
		j = random.randint(0,len(R2)-1)
		minibatch.append(R2[j])
		R2.remove(R2[j])
		
	    ys = []; batchStates = []
	    for i in range(N):
		s_1 = minibatch[i][3]; r = minibatch[i][2]
		ys.append(r + gamma*target_Q.predict(s_1))
		
		#Make new input for retraining - includes state and action
		batchStates.append([minibatch[i][0],minibatch[i][0]])
		
	    #minimize the loss L = (1/N)*sum(ys[i] - critic.predict(state))^2 - a linear regression
	    if len(batchStates) != 0:
		critic.fit(batchStates,ys)
	    
	    #update the actor policy somehow -- this is the hard part; test the critic alone first
	    
	    #Update the target critic
	    Q_para = np.array(critic.get_parameters())
	    if i == 0:
		target_Q.set_parameters(Q_para)
	    else:
		Qp_para = np.array(target_Q.get_parameters())
		new_para = tau*Q_para + (1-tau)*Qp_para
		target_Q.set_parameters(new_para)
		
	    #Update the target actor
	    
	    
	    #How do I write this
	    
	    #Set state to the new state.
	    state = newstate
	    
	    reward = getReward(state)
	    if reward != 0: #Game is done
		end = True	    
	    
	    #We play as "O"
	    x = -1; y = -1
	    while not (x >= 0 and x <= 2 and y >= 0 and y <= 2):
		try:
		    x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'O.' (Format: x, y):    "))
		    while x <= 0 or x >= 2 or y <= 0 or y >= 2:
			x, y = int(input("Sorry, those indices are invalid. Please input integral indices between 0 and 2 inclusive, in the correct format:    "))
		except:
		    print ("I'm sorry, but x and y should be numerical.")
	    
	    state[x,y] = -1
	    reward = getLoss(state)
	    if reward != 0: #Game is done
		end = True
def main():

    #Initialize the board - the state
    state = np.ndarray(shape=(3,3), dtype=int)
    actions = [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [2,2]] #also spaces
    moveNum = 1

    #Variables - might not be necessary
    #k = 1
    #alpha = 1/k
    gamma = .3
    #eps = .1
    tau = .02

    critic = Regressor(layers=[Layer("Rectifier", name="layer1", units=11, pieces=2), #9 squares, 1 action, 1 bias
                               Layer("Rectifier", name="layer2", units=11, pieces=2),
                               Layer("Rectifier", name="layer3", units=11, pieces=2),
                               Layer("Softmax")], random_state=1, learning_rate=0.02)
    
    #Randomly initialize the critic
    statesAndActs = []; rewards = []
    for i in range(500):
        sample_st = np.ndarray(shape=(3,3), dtype=int)
        for j in range(9):
            sample_st[math.floor(j/3),j%3] = int(random.randint(-1,1))
            
        if i == 0:
            print sample_st

        #random action, random reward
        act = random.randint(0,8)
        rew = random.randint(-1,1)
        rewards.append(rew)
        
        stateAndAct = []
        for k in range(9):
            stateAndAct.append(sample_st[int(k/3),k%3])
            
        stateAndAct.append(act)
        if i == 0:
            print stateAndAct
        statesAndActs.append(stateAndAct)
    
    print "hi"
    sA = np.array(statesAndActs); r = np.array(rewards)
    critic.fit(sA,r)
    print "aloha"
        
    target_Q = critic

    #Training
    for i in range(1000):
        reward = 0; end = False
        R = [] #Replay buffer

        while (end != True):

            #We play as both
            x = -1; y = -1
            success = False
            while success == False:
                try:
                    x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'X.' (Format: x, y):    "))
                    action = actions.index([x,y])
                    if action in actions: 
                        success = True
                        
                except:
                    print ("I'm sorry, but x and y should be numerical.")
                    
            newstate = getNextState(state, action, moveNum) #Execute action
            moveNum = moveNum + 1

            #Observe reward
            reward = getReward(newstate)
            R.append(state, action, reward, newstate)
            if reward != 0: #Game is done
                end = True
                break;

            N = math.floor(math.log(len(R)))
            R2 = R; minibatch = []
            for i in range(N):
                j = random.randint(0,len(R2)-1)
                minibatch.append(R2[j])
                R2.remove(R2[j])

            ys = []; batchStates = []
            for i in range(N):
                s_1 = minibatch[i][3]; r = minibatch[i][2]
                ys.append(r + gamma*target_Q.predict(s_1))

                #Make new input for retraining - includes state and action
                batchStates.append([minibatch[i][0],minibatch[i][0]])

            #minimize the loss L = (1/N)*sum(ys[i] - critic.predict(state))^2 - a linear regression
            if len(batchStates) != 0:
                critic.fit(batchStates,ys)

            #Update the target network manually
            Q_para = np.array(critic.get_parameters())
            if i == 0:
                target_Q.set_parameters(Q_para)
            else:
                Qp_para = np.array(target_Q.get_parameters())
                new_para = tau*Q_para + (1-tau)*Qp_para
                target_Q.set_parameters(new_para)
            

            #Set state to the new state.
            state = newstate    

            #"O"
            x = -1; y = -1
            while success == True:
                try:
                    x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'O.' (Format: x, y):    "))
                    action = [x,y]
                    if action in actions: 
                        success = False
                except:
                    print ("I'm sorry, but x and y should be numerical.")

            newstate2 = getNextState(state, action, par)
            reward = getLoss(state)
            R.append(state, action, reward, newstate2)
            if reward != 0: #Game is done
                end = True
                break;

    #end While
    
    #Testing the critic
    for i in range(10):
        print "hi"
    print "Meow"