def test_GetParamsThenConstructor(self): nn1 = MLPR(layers=[L("Linear")], n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn1._initialize(a_in, a_out) p1 = nn1.get_parameters() print(len(p1)) nn2 = MLPR(layers=[L("Linear")], n_iter=1, parameters=p1) nn2._initialize(a_in, a_out) p2 = nn2.get_parameters() print(len(p2)) assert_true((p1[0].weights.astype('float32') == p2[0].weights.astype('float32')).all()) assert_true((p1[0].biases.astype('float32') == p2[0].biases.astype('float32')).all())
def test_GetParamsThenConstructor(self): nn1 = MLPR(layers=[L("Linear")], n_iter=1) a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4)) nn1._initialize(a_in, a_out) p1 = nn1.get_parameters() print(len(p1)) nn2 = MLPR(layers=[L("Linear")], n_iter=1, parameters=p1) nn2._initialize(a_in, a_out) p2 = nn2.get_parameters() print(len(p2)) assert_true( (p1[0].weights.astype('float32') == p2[0].weights.astype('float32') ).all()) assert_true( (p1[0].biases.astype('float32') == p2[0].biases.astype('float32') ).all())
def test_SetLayerParamsDict(self): nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')]) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn._initialize(a_in, a_out) weights = numpy.random.uniform(-1.0, +1.0, (32,4)) biases = numpy.random.uniform(-1.0, +1.0, (4,)) nn.set_parameters({'abcd': (weights, biases)}) p = nn.get_parameters() assert_true((p[1].weights.astype('float32') == weights.astype('float32')).all()) assert_true((p[1].biases.astype('float32') == biases.astype('float32')).all())
def test_SetLayerParamsList(self): nn = MLPR(layers=[L("Linear")]) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn._initialize(a_in, a_out) weights = numpy.random.uniform(-1.0, +1.0, (16,4)) biases = numpy.random.uniform(-1.0, +1.0, (4,)) nn.set_parameters([(weights, biases)]) p = nn.get_parameters() assert_true((p[0].weights.astype('float32') == weights.astype('float32')).all()) assert_true((p[0].biases.astype('float32') == biases.astype('float32')).all())
def test_GetLayerParams(self): nn = MLPR(layers=[L("Linear")], n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn._initialize(a_in, a_out) p = nn.get_parameters() assert_equals(type(p), list) assert_true(isinstance(p[0], tuple)) assert_equals(p[0].layer, 'output') assert_equals(p[0].weights.shape, (16, 4)) assert_equals(p[0].biases.shape, (4,))
def test_GetLayerParams(self): nn = MLPR(layers=[L("Linear")], n_iter=1) a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4)) nn._initialize(a_in, a_out) p = nn.get_parameters() assert_equals(type(p), list) assert_true(isinstance(p[0], tuple)) assert_equals(p[0].layer, 'output') assert_equals(p[0].weights.shape, (16, 4)) assert_equals(p[0].biases.shape, (4, ))
def test_LayerParamsSkipOneWithNone(self): nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')]) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn._initialize(a_in, a_out) weights = numpy.random.uniform(-1.0, +1.0, (32,4)) biases = numpy.random.uniform(-1.0, +1.0, (4,)) nn.set_parameters([None, (weights, biases)]) p = nn.get_parameters() assert_true((p[1].weights == weights).all()) assert_true((p[1].biases == biases).all())
def test_SetLayerParamsDict(self): nn = MLPR(layers=[L("Sigmoid", units=32), L("Linear", name='abcd')]) a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4)) nn._initialize(a_in, a_out) weights = numpy.random.uniform(-1.0, +1.0, (32, 4)) biases = numpy.random.uniform(-1.0, +1.0, (4, )) nn.set_parameters({'abcd': (weights, biases)}) p = nn.get_parameters() assert_true(( p[1].weights.astype('float32') == weights.astype('float32')).all()) assert_true( (p[1].biases.astype('float32') == biases.astype('float32')).all())
def test_SetLayerParamsList(self): nn = MLPR(layers=[L("Linear")]) a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, 4)) nn._initialize(a_in, a_out) weights = numpy.random.uniform(-1.0, +1.0, (16, 4)) biases = numpy.random.uniform(-1.0, +1.0, (4, )) nn.set_parameters([(weights, biases)]) p = nn.get_parameters() assert_true(( p[0].weights.astype('float32') == weights.astype('float32')).all()) assert_true( (p[0].biases.astype('float32') == biases.astype('float32')).all())
def main(): #Initialize the board - the state state = np.ndarray(shape=(3,3), dtype=int) win_states = makeWinStates() actions = [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [2,2]] #also spaces #Variables - might not be necessary k = 1 alpha = 1/k gamma = .3 eps = .1 #Initializing our 'overkill' neural networks actor = Regressor(layers, warning=None, weights=None, random_state=None, learning_rule='sgd', learning_rate=0.01, learning_momentum=0.9, regularize=None, weight_decay=None, dropout_rate=None, batch_size=1, n_iter=None, n_stable=10, f_stable=0.001, valid_set=None, valid_size=0.0, loss_type=None, callback=None, debug=False, verbose=None) #??? #Training the actor with a random policy trainStates = []; acts = [] for i in range(500): sample_st = np.ndarray(shape=(3,3), dtype=int) for j in range(9): sample_st[math.floor(j/3),j%3] = random.randint(-1,1) act = random.randint(0,8) #action represented by its index trainStates.append(sample_st) acts.append(act) actor.fit(trainStates, acts) target_mu = actor critic = Regressor(layers=[Layer("Rectifier", name="layer1", units=11, pieces=2), #9 squares, 1 action, 1 bias Layer("Rectifier", name="layer2", units=11, pieces=2), Layer("Rectifier", name="layer3", units=11, pieces=2), Layer("Softmax")], learning_rate=0.02) #Randomly initialize the critic statesAndActs = []; rewards = [] for i in range(500): sample_st = np.ndarray(shape=(3,3), dtype=int) for j in range(9): sample_st[math.floor(j/3),j%3] = random.randint(-1,1) #random action, random reward act = random.randint(0,8) rew = random.randint(-1,1) statesAndActs.append([sample_st,act]) rewards.append(rew) critic.fit(statesAndActs,rewards) target_Q = critic for i in range(10): reward = 0; end = False; R = [] while (end != True): action = actor.predict(state) newstate = getNextState(state, action) #Execute action #Observe reward reward = getReward(state) if reward != 0: #Game is done end = True #Replay buffer review R.append(state, action, reward, newstate) N = math.floor(math.log(len(R))) R2 = R; minibatch = [] for i in range(N): j = random.randint(0,len(R2)-1) minibatch.append(R2[j]) R2.remove(R2[j]) ys = []; batchStates = [] for i in range(N): s_1 = minibatch[i][3]; r = minibatch[i][2] ys.append(r + gamma*target_Q.predict(s_1)) #Make new input for retraining - includes state and action batchStates.append([minibatch[i][0],minibatch[i][0]]) #minimize the loss L = (1/N)*sum(ys[i] - critic.predict(state))^2 - a linear regression if len(batchStates) != 0: critic.fit(batchStates,ys) #update the actor policy somehow -- this is the hard part; test the critic alone first #Update the target critic Q_para = np.array(critic.get_parameters()) if i == 0: target_Q.set_parameters(Q_para) else: Qp_para = np.array(target_Q.get_parameters()) new_para = tau*Q_para + (1-tau)*Qp_para target_Q.set_parameters(new_para) #Update the target actor #How do I write this #Set state to the new state. state = newstate reward = getReward(state) if reward != 0: #Game is done end = True #We play as "O" x = -1; y = -1 while not (x >= 0 and x <= 2 and y >= 0 and y <= 2): try: x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'O.' (Format: x, y): ")) while x <= 0 or x >= 2 or y <= 0 or y >= 2: x, y = int(input("Sorry, those indices are invalid. Please input integral indices between 0 and 2 inclusive, in the correct format: ")) except: print ("I'm sorry, but x and y should be numerical.") state[x,y] = -1 reward = getLoss(state) if reward != 0: #Game is done end = True
def main(): #Initialize the board - the state state = np.ndarray(shape=(3,3), dtype=int) actions = [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [2,2]] #also spaces moveNum = 1 #Variables - might not be necessary #k = 1 #alpha = 1/k gamma = .3 #eps = .1 tau = .02 critic = Regressor(layers=[Layer("Rectifier", name="layer1", units=11, pieces=2), #9 squares, 1 action, 1 bias Layer("Rectifier", name="layer2", units=11, pieces=2), Layer("Rectifier", name="layer3", units=11, pieces=2), Layer("Softmax")], random_state=1, learning_rate=0.02) #Randomly initialize the critic statesAndActs = []; rewards = [] for i in range(500): sample_st = np.ndarray(shape=(3,3), dtype=int) for j in range(9): sample_st[math.floor(j/3),j%3] = int(random.randint(-1,1)) if i == 0: print sample_st #random action, random reward act = random.randint(0,8) rew = random.randint(-1,1) rewards.append(rew) stateAndAct = [] for k in range(9): stateAndAct.append(sample_st[int(k/3),k%3]) stateAndAct.append(act) if i == 0: print stateAndAct statesAndActs.append(stateAndAct) print "hi" sA = np.array(statesAndActs); r = np.array(rewards) critic.fit(sA,r) print "aloha" target_Q = critic #Training for i in range(1000): reward = 0; end = False R = [] #Replay buffer while (end != True): #We play as both x = -1; y = -1 success = False while success == False: try: x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'X.' (Format: x, y): ")) action = actions.index([x,y]) if action in actions: success = True except: print ("I'm sorry, but x and y should be numerical.") newstate = getNextState(state, action, moveNum) #Execute action moveNum = moveNum + 1 #Observe reward reward = getReward(newstate) R.append(state, action, reward, newstate) if reward != 0: #Game is done end = True break; N = math.floor(math.log(len(R))) R2 = R; minibatch = [] for i in range(N): j = random.randint(0,len(R2)-1) minibatch.append(R2[j]) R2.remove(R2[j]) ys = []; batchStates = [] for i in range(N): s_1 = minibatch[i][3]; r = minibatch[i][2] ys.append(r + gamma*target_Q.predict(s_1)) #Make new input for retraining - includes state and action batchStates.append([minibatch[i][0],minibatch[i][0]]) #minimize the loss L = (1/N)*sum(ys[i] - critic.predict(state))^2 - a linear regression if len(batchStates) != 0: critic.fit(batchStates,ys) #Update the target network manually Q_para = np.array(critic.get_parameters()) if i == 0: target_Q.set_parameters(Q_para) else: Qp_para = np.array(target_Q.get_parameters()) new_para = tau*Q_para + (1-tau)*Qp_para target_Q.set_parameters(new_para) #Set state to the new state. state = newstate #"O" x = -1; y = -1 while success == True: try: x, y = int(input("Enter the row and column indices of the location at which you intend to draw an 'O.' (Format: x, y): ")) action = [x,y] if action in actions: success = False except: print ("I'm sorry, but x and y should be numerical.") newstate2 = getNextState(state, action, par) reward = getLoss(state) R.append(state, action, reward, newstate2) if reward != 0: #Game is done end = True break; #end While #Testing the critic for i in range(10): print "hi" print "Meow"