Esempio n. 1
0
    a = Actor(LAMBDA, ALPHA_ACTOR, GAMMA, EPSILON)
    c = create_critic(LAMBDA, ALPHA_CRITIC, GAMMA, USE_TABLE, state_size,
                      LAYERS, ADAPT_END_LAYERS, UPDATE_WHOLE_SEQUENCE)

    if SCENARIO in SCENARIO_DESCRIPTIONS:
        print(
            f'\nScenario: {SCENARIO}\nDescription: {SCENARIO_DESCRIPTIONS[SCENARIO]}\n'
        )
    else:
        print(
            f'\nUndefined scenario\n\nRunning scenario: {0}\nDescription: {SCENARIO_DESCRIPTIONS[0]}\n'
        )

    print('Training')
    results = run_ai(BOARD_TYPE, BOARD_SIZE, EPOCHS, a, c, EMPTY_NODES_POS)
    a.epsilon = 0

    if ITERATIONS_TEST:
        print('\nTesting policy with greedy behaviour')
        results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_TEST, a, c,
                          EMPTY_NODES_POS, False, False, False)

    if ITERATIONS_SHOW_END_STATE:
        print('\nTesting policy and showing only last move')
        results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_SHOW_END_STATE, a,
                          c, EMPTY_NODES_POS, False, False, True, DEBUG,
                          DELAY_END_STATE)

    if ITERATIONS_SHOW_ALL_ACTIONS:
        print('\nTesting policy and showing every move')
        results += run_ai(BOARD_TYPE, BOARD_SIZE, ITERATIONS_SHOW_ALL_ACTIONS,