from py4j.java_gateway import JavaGateway gateway = JavaGateway() #random = gateway.jvm.java.util.Random() # create a java.util.Random instance #number1 = random.nextInt(10) gateway.innitGame() state = gateway.getState() print(state[0])
def main(): steps_until_reset = TARGET_UPDATE_FREQ random_action_probability = INITIAL_RANDOM_ACTION # Initialize replay memory D to capacity N replay = ReplayBuffer(REPLAY_MEMORY_SIZE) # Initialize action-value model with random weights action_model = get_model() # Initialize target model with same weights #target_model = get_model() #target_model.set_weights(action_model.get_weights()) env = JavaGateway() jvm = env.jvm for episode in range(NUM_EPISODES): playerNumber = env.innitGame() jObservation = env.getState() valueSum = 0 wasNotBadMove = True observation = [] observation.append(1) for idx in range(9): observation.append(jObservation[idx]) #print(observation) done = False reward = 0 for iteration in range(MAX_ITERATIONS): random_action_probability *= RANDOM_ACTION_DECAY random_action_probability = max(random_action_probability, 0.1) old_observation = observation # if episode % 10 == 0: # env.render() if np.random.random() < random_action_probability: action = np.random.choice(range(ACTIONS_DIM)) if episode >= 10000 and playerNumber == 2: print(old_observation) print(valueSum) action = np.int64(input("Space?")) else: q_values = get_q(action_model, observation) action = np.argmax(q_values) if episode > 10000 and playerNumber == 2: print(old_observation) print(valueSum) action = np.int64(input("Space?")) l = jvm.java.util.ArrayList() l.append(playerNumber) l.append(action.item()) reward = env.step(l) valueSum += reward wasNotBadMove = True if reward == -2: wasNotBadMove = False #Toggle Player Number if wasNotBadMove: if playerNumber == 1: playerNumber = 2 else: playerNumber = 1 #print(wasNotBadMove) #print(playerNumber) iObservation = env.getState() observation = [] observation.append(playerNumber) for idx in range(9): observation.append(iObservation[idx]) done = env.isDone() if done: # print action_model.get_weights() # print target_model.get_weights() #print 'Game finished after {} iterations'.format(iteration) #reward = -200 print(observation) print(valueSum) replay.add(old_observation, action, reward, None) if reward == 0: print("good game") if reward == 5: modOb = old_observation modOb[0] = playerNumber replay.add(modOb, action, -50, None) break replay.add(old_observation, action, reward, observation) if replay.size() >= MINIBATCH_SIZE: sample_transitions = replay.sample(MINIBATCH_SIZE) update_action(action_model, action_model, sample_transitions) steps_until_reset -= 1