nn.ReLU(), nn.Linear(lay2, outp), nn.Sigmoid()) #player2 model2 = nn.Sequential(nn.Linear(ins, lay1), nn.ReLU(), nn.Linear(lay1, lay2), nn.ReLU(), nn.Linear(lay2, outp), nn.Sigmoid()) #model.load_state_dict(basemodel) loss_fn = nn.MSELoss() op = optim.Adam(model.parameters(), lr=.001) loss_fn2 = nn.MSELoss() op2 = optim.Adam(model2.parameters(), lr=.001) epochs = 5000 eps = 1 verbose = False game = TicTacToe() print(game.av_moves()) flag2cpu = True gamma = .8 losses = [] losses2 = [] framest = [] tw = 0 tt = 0 tl = 0 tnm = 0 for i in range(2 * epochs): if epochs == i: flag2cpu = False #epsilon decaying factor eps = .5 + .5 * np.tanh(-10 * (i / epochs) + 2) #print(eps)