a = Actor(LAMBDA, ALPHA_ACTOR, GAMMA, EPSILON) c = create_critic(LAMBDA, ALPHA_CRITIC, GAMMA, USE_TABLE, state_size, LAYERS, ADAPT_END_LAYERS, UPDATE_WHOLE_SEQUENCE) if SCENARIO in SCENARIO_DESCRIPTIONS: print( f'\nScenario: {SCENARIO}\nDescription: {SCENARIO_DESCRIPTIONS[SCENARIO]}\n' ) else: print( f'\nUndefined scenario\n\nRunning scenario: {0}\nDescription: {SCENARIO_DESCRIPTIONS[0]}\n' ) print('Training') results = run_ai(BOARD_TYPE, BOARD_SIZE, EPOCHS, a, c, EMPTY_NODES_POS) a.epsilon = 0 if ITERATIONS_TEST: print('\nTesting policy with greedy behaviour') results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_TEST, a, c, EMPTY_NODES_POS, False, False, False) if ITERATIONS_SHOW_END_STATE: print('\nTesting policy and showing only last move') results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_SHOW_END_STATE, a, c, EMPTY_NODES_POS, False, False, True, DEBUG, DELAY_END_STATE) if ITERATIONS_SHOW_ALL_ACTIONS: print('\nTesting policy and showing every move') results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_SHOW_ALL_ACTIONS,