def nn_mv():
    global Q_map, P_map, visit_count_map
    global Q_map_next, P_map_next, visit_count_map_next

    t_start = time.time()
    arch.sess.run(arch.session_backup)
    pu.init_tree()
    pu.session_backup()

    if run_net:
        if turn == 0:
            arch.sess.run(arch.nn_prob_move_unit_valid_mvs, feed_dict=ret_d(0))
        else:
            arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs,
                          feed_dict=ret_d(0))

        Q_map, P_map, visit_count_map = ret_stats(0)
    else:
        for sim in range(N_SIM):
            # initial moves
            for player in [0, 1]:
                valid_mv_map, pol = arch.sess.run(
                    [arch.valid_mv_map, arch.pol], feed_dict=ret_d(player))

                pu.add_valid_mvs(player, valid_mv_map)
                to_coords = pu.choose_moves(player, pol, CPUCT)[0]
                pu.register_mv(player, to_coords)

                arch.sess.run(arch.move_frm_inputs,
                              feed_dict={
                                  arch.moving_player: player,
                                  arch.to_coords_input: to_coords
                              })
            # backup then make next move
            for turn_sim in range(turn, N_TURNS):
                for player in [0, 1]:
                    valid_mv_map, pol, val = arch.sess.run(
                        [arch.valid_mv_map, arch.pol, arch.val],
                        feed_dict=ret_d(player))

                    pu.backup_visit(player, val)

                    pu.add_valid_mvs(player, valid_mv_map)
                    to_coords = pu.choose_moves(player, pol, CPUCT)[0]
                    pu.register_mv(player, to_coords)

                    arch.sess.run(arch.move_frm_inputs,
                                  feed_dict={
                                      arch.moving_player: player,
                                      arch.to_coords_input: to_coords
                                  })

            # backup terminal state
            for player in [0, 1]:
                winner = arch.sess.run(arch.winner, feed_dict=ret_d(player))
                pu.backup_visit(player, winner)

            arch.sess.run(arch.session_restore)
            pu.session_restore()

            if sim % 20 == 0:
                '''Q_map, P_map, visit_count_map = ret_stats(0)
				arch.sess.run(arch.tree_det_move_unit, feed_dict = ret_d(0))
				Q_map_next, P_map_next, visit_count_map_next = ret_stats(1)

				arch.sess.run(arch.session_restore)
				pu.session_restore()

				draw(True)
				pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start))
				'''
                print 'simulation', sim, 'total elapsed time', time.time(
                ) - t_start

        ### make move
        Q_map, P_map, visit_count_map = ret_stats(0)

        valid_mv_map, pol = arch.sess.run(
            [arch.imgs, arch.valid_mv_map, arch.pol], feed_dict=ret_d(0))[1:]

        #########
        pu.add_valid_mvs(player, valid_mv_map)
        visit_count_map = pu.choose_moves(player, pol, CPUCT)[-1]

        to_coords = arch.sess.run(
            [arch.tree_det_visit_coord, arch.tree_det_move_unit],
            feed_dict={
                arch.moving_player: 0,
                arch.visit_count_map: visit_count_map,
                arch.dir_pre: dir_pre,
                arch.dir_a: DIR_A
            })[0]

        pu.register_mv(player, to_coords)

        pu.prune_tree()
        print time.time() - t_start

    return arch.sess.run(arch.gm_vars['board'])[0]
Beispiel #2
0
                to_coords = arch.sess.run(
                    [arch.tree_prob_visit_coord, arch.tree_prob_move_unit],
                    feed_dict={
                        arch.moving_player: player,
                        arch.visit_count_map: visit_count_map,
                        arch.dir_pre: dir_pre,
                        arch.dir_a: DIR_A
                    })[0]  # make move in proportion to visit counts

                pu.register_mv(player, to_coords)  # register move in tree

                ###############

                buffer_loc += gv.BATCH_SZ

            pu.prune_tree()

            if (turn + 1) % 2 == 0:
                print 'finished turn %i (%i sec) %i' % (
                    turn, time.time() - turn_start_t, batch_set)

        ##### create prob maps
        for player in [0, 1]:
            winner[batch_set, :, player] = arch.sess.run(
                arch.winner, feed_dict={arch.moving_player: player})
        tree_probs[batch_set] = pu.return_probs_map(N_TURNS)

        batch_set += 1
        batch_sets_created += 1

    batch_sets_created = N_BATCH_SETS - 1
Beispiel #3
0
def worker(i_WORKER_ID):
    global WORKER_ID, weights_current, weights_eval_current, weights_eval32_current, val_mean_sq_err, pol_cross_entrop_err, val_pearsonr
    global board, winner, tree_probs, save_d, bp_eval_nodes, t_start, run_time, save_nm
    WORKER_ID = i_WORKER_ID

    err_denom = 0
    val_pearsonr = 0
    val_mean_sq_err = 0
    pol_cross_entrop_err = 0
    t_start = datetime.now()
    run_time = datetime.now() - datetime.now()

    #### restore
    save_d = np.load(sdir + save_nm, allow_pickle=True).item()

    for key in save_vars + state_vars + training_ex_vars:
        if (key == 'save_nm') or (key in shared_nms):
            continue
        exec('global ' + key)
        exec('%s = save_d["%s"]' % (key, key))

    EPS_ORIG = EPS
    #EPS = 2e-3 ###################################################### < overrides previous backprop step sizes

    ############# init / load model
    DEVICE = '/gpu:%i' % WORKER_ID
    arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS,
                    MOMENTUM, LSQ_LAMBDA, LSQ_REG_LAMBDA,
                    POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA,
                    L2_LAMBDA)

    bp_eval_nodes = [
        arch.train_step, arch.val_mean_sq_err, arch.pol_cross_entrop_err,
        arch.val_pearsonr
    ]

    # ops for trainable weights
    weights_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        scope='main')
    weights_eval_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             scope='eval/')
    weights_eval32_current = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='eval32')

    if new_model == False:
        print 'restore nm %s' % save_nm
        arch.saver.restore(arch.sess, sdir + save_nm)
        if WORKER_ID == MASTER_WORKER:
            set_all_shared_to_loaded()
    else:  #### sync model weights
        if WORKER_ID == MASTER_WORKER:
            set_all_to_eval32_and_get()
        else:
            while set_weights() == False:  # wait for weights to be set
                continue
    ###### shared variables
    board = np.frombuffer(s_board.get_obj(), 'float16').reshape(
        (BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels))
    winner = np.frombuffer(s_winner.get_obj(), 'int8').reshape(
        (N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ))
    tree_probs = np.frombuffer(s_tree_probs.get_obj(), 'float32').reshape(
        (BUFFER_SZ, gv.map_szt))

    ######## local variables
    # BUFFER_SZ = N_BATCH_SETS * N_TURNS * 2 * gv.BATCH_SZ
    L_BUFFER_SZ = N_TURNS * 2 * gv.BATCH_SZ
    board_local = np.zeros(
        (L_BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels),
        dtype='float16')
    winner_local = np.zeros((N_TURNS, 2, gv.BATCH_SZ), dtype='int8')
    tree_probs_local = np.zeros((L_BUFFER_SZ, gv.map_szt), dtype='float32')

    if EPS_ORIG != EPS:
        #save_nm += 'EPS_%2.4f.npy' % EPS
        save_d['EPS'] = EPS
        print 'saving to', save_nm

    ### sound
    if WORKER_ID == MASTER_WORKER:
        pygame.init()
        pygame.mixer.music.load('/home/tapa/gtr-nylon22.mp3')

    ######
    while True:
        #### generate training batches with `main` model
        arch.sess.run(arch.init_state)
        pu.init_tree()
        turn_start_t = time.time()
        buffer_loc_local = 0
        for turn in range(N_TURNS):
            ### make move
            for player in [0, 1]:
                set_weights()
                run_sim(turn, player)  # using `main` model

                inds = buffer_loc_local + np.arange(
                    gv.BATCH_SZ)  # inds to save training vars at
                board_local[inds], valid_mv_map, pol = arch.sess.run(
                    [arch.imgs, arch.valid_mv_map, arch.pol['main']],
                    feed_dict=ret_d(player))  # generate batch and valid moves

                #########
                pu.add_valid_mvs(player,
                                 valid_mv_map)  # register valid moves in tree
                visit_count_map = pu.choose_moves(
                    player, np.array(pol, dtype='single'),
                    CPUCT)[-1]  # get number of times each node was visited

                tree_probs_local[inds] = visit_count_map / visit_count_map.sum(
                    1)[:, np.newaxis]

                to_coords = arch.sess.run(
                    [arch.tree_prob_visit_coord, arch.tree_prob_move_unit],
                    feed_dict={
                        arch.moving_player: player,
                        arch.visit_count_map: visit_count_map
                    })[0]  # make move in proportion to visit counts

                pu.register_mv(player, np.array(
                    to_coords, dtype='int32'))  # register move in tree

                ###############

                buffer_loc_local += gv.BATCH_SZ

            pu.prune_tree(0)

            if (turn + 1) % 2 == 0:
                print 'finished turn %i (%i sec) GPU %i batch_sets_created %i (total %i)' % (
                    turn, time.time() - turn_start_t, WORKER_ID,
                    batch_sets_created.value, batch_sets_created_total.value)

        ##### create prob maps
        for player in [0, 1]:
            winner_local[:, player] = arch.sess.run(
                arch.winner, feed_dict={arch.moving_player: player})

        #### set shared buffers with training variables we just generated from self-play
        with buffer_lock:
            board[buffer_loc.value:buffer_loc.value +
                  buffer_loc_local] = board_local
            tree_probs[buffer_loc.value:buffer_loc.value +
                       buffer_loc_local] = tree_probs_local
            winner[batch_set.value] = winner_local

            buffer_loc.value += buffer_loc_local
            batch_sets_created.value += 1
            batch_sets_created_total.value += 1
            batch_set.value += 1

            # save checkpoint
            if buffer_loc.value >= BUFFER_SZ or batch_set.value >= N_BATCH_SETS_TOTAL:
                buffer_loc.value = 0
                batch_set.value = 0

                # save batch only
                batch_d = {}
                for key in ['tree_probs', 'winner', 'board']:
                    exec(
                        'batch_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))'
                        % (key, key))
                batch_save_nm = sdir + save_nm + '_batches' + str(
                    batch_sets_created_total.value)
                np.save(batch_save_nm, batch_d)
                print 'saved', batch_save_nm
                batch_d = {}

        ################ train/eval/test
        if WORKER_ID == MASTER_WORKER and batch_sets_created.value >= N_BATCH_SETS_BLOCK and batch_sets_created_total.value >= N_BATCH_SETS_MIN:
            ########### train
            with buffer_lock:
                if batch_sets_created_total.value < (
                        N_BATCH_SETS_MIN + N_BATCH_SETS_BLOCK
                ):  # don't overtrain on the initial set
                    batch_sets_created.value = N_BATCH_SETS_BLOCK

                if batch_sets_created.value >= N_BATCH_SETS_TOTAL:  # if for some reason master worker gets delayed
                    batch_sets_created.value = N_BATCH_SETS_BLOCK

                board_c = np.array(board, dtype='single')
                winner_rc = np.array(winner.ravel(), dtype='single')

                valid_entries = np.prod(
                    np.isnan(tree_probs) == False, 1) * np.nansum(
                        tree_probs,
                        1)  # remove examples with nans or no probabilties
                inds_valid = np.nonzero(valid_entries)[0]
                print len(
                    inds_valid), 'out of', BUFFER_SZ, 'valid training examples'

                for rep in range(N_REP_TRAIN):
                    random.shuffle(inds_valid)
                    for batch in range(N_TURNS * batch_sets_created.value):
                        inds = inds_valid[batch * gv.BATCH_SZ +
                                          np.arange(gv.BATCH_SZ)]

                        board2, tree_probs2 = pu.rotate_reflect_imgs(
                            board_c[inds], tree_probs[inds]
                        )  # rotate and reflect board randomly

                        train_dict = {
                            arch.imgs32: board2,
                            arch.pol_target: tree_probs2,
                            arch.val_target: winner_rc[inds]
                        }

                        val_mean_sq_err_tmp, pol_cross_entrop_err_tmp, val_pearsonr_tmp = \
                                arch.sess.run(bp_eval_nodes, feed_dict=train_dict)[1:]

                        # update logs
                        val_mean_sq_err += val_mean_sq_err_tmp
                        pol_cross_entrop_err += pol_cross_entrop_err_tmp
                        val_pearsonr += val_pearsonr_tmp
                        global_batch += 1
                        err_denom += 1

                batch_sets_created.value = 0

            ############### `eval` against prior version of self (`main`)
            set_eval16_to_eval32_start_eval(
            )  # update `eval` tf and shared copies to follow backprop (`eval32`)
            eval_model()  # run match(es)
            with eval_stats_lock:
                print '-------------------'
                model_outperforms, self_eval_perc = print_eval_stats()
                print '------------------'
            if model_outperforms:  # update `eval` AND `main` both tf and shared copies to follow backprop
                set_all_to_eval32_and_get()

            ##### network evaluation against random player and GNU Go
            global_batch_evald = global_batch
            global_batch_saved = global_batch
            t_eval = time.time()
            print 'evaluating nn'

            d = ret_d(0)

            ################## monitor training progress:
            # test `eval` against GNU Go and a player that makes only random moves
            for nm, N_GMS_L in zip(['nn', 'tree'],
                                   [[N_EVAL_NN_GNU_GMS, N_EVAL_NN_GMS],
                                    [N_EVAL_TREE_GMS, N_EVAL_TREE_GNU_GMS]]):
                for gnu, N_GMS in zip([True, False], N_GMS_L):
                    if N_GMS == 0:
                        continue
                    key = '%s%s' % (nm, '' + gnu * '_gnu')
                    t_key = time.time()
                    boards[key] = np.zeros((N_TURNS, ) + gv.INPUTS_SHAPE[:-1],
                                           dtype='int8')
                    n_mvs = 0.
                    win_eval = 0.
                    score_eval = 0.
                    n_captures_eval = np.zeros(2, dtype='single')
                    for gm in range(N_GMS):
                        arch.sess.run(arch.init_state)
                        pu.init_tree()
                        # init gnu state
                        if gnu:
                            gt.init_board(arch.sess.run(arch.gm_vars['board']))

                        for turn in range(N_TURNS):
                            board_tmp = arch.sess.run(arch.gm_vars['board'])

                            #### search / make move
                            if nm == 'tree':
                                run_sim(turn)
                                assert False
                            else:
                                # prob choose first move, deterministically choose remainder
                                if turn == 0:
                                    to_coords = arch.sess.run([
                                        arch.
                                        nn_prob_to_coords_valid_mvs['eval'],
                                        arch.
                                        nn_prob_move_unit_valid_mvs['eval']
                                    ],
                                                              feed_dict=d)[0]
                                else:
                                    to_coords = arch.sess.run([
                                        arch.nn_max_prob_to_coords_valid_mvs[
                                            'eval'], arch.
                                        nn_max_prob_move_unit_valid_mvs['eval']
                                    ],
                                                              feed_dict=d)[0]

                            board_tmp2 = arch.sess.run(arch.gm_vars['board'])
                            n_mvs += board_tmp.sum() - board_tmp2.sum()

                            # move opposing player
                            if gnu:
                                gt.move_nn(to_coords)

                                # mv gnugo
                                ai_to_coords = gt.move_ai()
                                arch.sess.run(
                                    arch.imgs,
                                    feed_dict={arch.moving_player: 1})
                                arch.sess.run(
                                    arch.nn_max_move_unit['eval'],
                                    feed_dict={
                                        arch.moving_player: 1,
                                        arch.nn_max_to_coords['eval']:
                                        ai_to_coords
                                    })
                            else:
                                arch.sess.run(arch.imgs, feed_dict=ret_d(1))
                                arch.sess.run(arch.move_random_ai,
                                              feed_dict=ret_d(1))

                            boards[key][turn] = arch.sess.run(
                                arch.gm_vars['board'])

                            if nm == 'tree':
                                pu.prune_tree(0)
                            # turn

                        # save stats
                        win_tmp, score_tmp, n_captures_tmp = arch.sess.run(
                            [arch.winner, arch.score, arch.n_captures],
                            feed_dict={arch.moving_player: 0})
                        scores[key] = copy.deepcopy(score_tmp)

                        win_eval += win_tmp.mean()
                        score_eval += score_tmp.mean()
                        n_captures_eval += n_captures_tmp.mean(1)
                        # gm

                    # log
                    log['win_' + key].append((win_eval /
                                              (2 * np.single(N_GMS))) + .5)
                    log['n_captures_' + key].append(n_captures_eval[0] /
                                                    np.single(N_GMS))
                    log['n_captures_opp_' + key].append(n_captures_eval[1] /
                                                        np.single(N_GMS))
                    log['score_' + key].append(score_eval / np.single(N_GMS))
                    log['n_mvs_' + key].append(
                        n_mvs / np.single(N_GMS * N_TURNS * gv.BATCH_SZ))

                    log['boards_' + key].append(boards[key][-1])
                    print key, 'eval time', time.time() - t_key
                    # gnu
                # nm
            log['eval_batch'].append(global_batch)
            print 'eval time', time.time() - t_eval
            # eval
            ####################### end network evaluation

            pol, pol_pre = arch.sess.run(
                [arch.pol['eval'], arch.pol_pre['eval']],
                feed_dict={arch.moving_player: 0})

            ##### log
            log['val_mean_sq_err'].append(val_mean_sq_err / err_denom)
            log['pol_cross_entrop'].append(pol_cross_entrop_err / err_denom)
            log['val_pearsonr'].append(val_pearsonr / err_denom)
            log['opt_batch'].append(global_batch)

            log['pol_max_pre'].append(np.median(pol_pre.max(1)))
            log['pol_max'].append(np.median(pol.max(1)))

            log['self_eval_win_rate'].append(
                np.single(eval_games_won.value) /
                (eval_batch_sets_played.value * gv.BATCH_SZ))
            log['model_promoted'].append(model_outperforms)

            log['self_eval_perc'].append(self_eval_perc)

            val_mean_sq_err = 0
            pol_cross_entrop_err = 0
            val_pearsonr = 0
            err_denom = 0

            ########## print
            run_time += datetime.now() - t_start

            if (save_counter % 20) == 0:
                print
                print Style.BRIGHT + Fore.GREEN + save_nm, Fore.WHITE + 'EPS', EPS, 'start', str(start_time).split('.')[0], 'run time', \
                  str(run_time).split('.')[0]
                print
            save_counter += 1

            print_str = '%i' % global_batch
            for key in print_logs:
                print_str += ' %s ' % key
                if isinstance(log[key], int):
                    print_str += str(log[key][-1])
                else:
                    print_str += '%1.4f' % log[key][-1]

            print_str += ' %4.1f' % (datetime.now() - t_start).total_seconds()
            print print_str

            t_start = datetime.now()

            # play sound
            if os.path.isfile('/home/tapa/play_sound.txt'):
                pygame.mixer.music.play()

        ############# save
        if WORKER_ID == MASTER_WORKER:
            with buffer_lock:
                # update state vars
                #shared_nms = ['buffer_loc', 'batch_sets_created', 'batch_set', 's_board', 's_winner', 's_tree_probs', 'weights_changed', 'buffer_lock', 'weights_lock', 'save_nm', 'new_model', 'weights']
                for key in state_vars + training_ex_vars:
                    if key in [
                            'buffer_loc', 'batch_sets_created',
                            'batch_sets_created_total', 'batch_set',
                            'eval_games_won', 'eval_batch_sets_played'
                    ]:
                        exec('save_d["%s"] = %s.value' % (key, key))
                    elif key in ['tree_probs', 'winner', 'board']:
                        exec(
                            'save_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))'
                            % (key, key))
                    else:
                        exec('save_d["%s"] = %s' % (key, key))

            save_nms = [save_nm]
            if (datetime.now() - save_t).seconds > CHKP_FREQ:
                save_nms += [save_nm + str(datetime.now())]
                save_t = datetime.now()

            for nm in save_nms:
                np.save(sdir + nm, save_d)
                arch.saver.save(arch.sess, sdir + nm)

            print sdir + nm, 'saved'
def nn_mv():
    global Q_map, P_map, visit_count_map, valid_mv_map, pol
    global Q_map_next, P_map_next, visit_count_map_next, to_coords

    t_start = time.time()
    arch.sess.run(arch.session_backup)

    #### make most probable mv, do not use tree search
    if run_one_pass_only:
        # 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined)
        d = ret_d(NET_PLAYER)
        imgs = arch.sess.run(arch.imgs, feed_dict=d)
        d[arch.imgs32] = np.asarray(imgs, dtype='float')
        pol = arch.sess.run(arch.pol[net], feed_dict=d)
        d = ret_d(NET_PLAYER)
        d[arch.pol['eval']] = pol

        if turn == 0:
            arch.sess.run(arch.nn_prob_move_unit_valid_mvs['eval'],
                          feed_dict=d)
        else:
            arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs['eval'],
                          feed_dict=d)

        #Q_map, P_map, visit_count_map = ret_stats(0)

    ##### use tree search
    else:
        #pu.init_tree()
        pu.session_backup()

        sim = 0
        # each loop is one simulation
        while True:
            if ((time.time() - t_start) > TIME_MIN) and (sim >= SIM_MIN):
                break

            # backup then make next move
            # (this loop, iterates over one full game-play from present turn)
            for turn_sim in range(turn, np.max(
                (N_TURNS + 1, turn + TURN_MIN))):
                for player in [0, 1]:
                    if turn_sim == turn and human_player(
                    ) == 0 and player == 0:  # skip player 0 (human), has already moved
                        continue

                    # get valid moves, network policy and value estimates:
                    valid_mv_map, pol, val = arch.sess.run(
                        [arch.valid_mv_map, arch.pol[net], arch.val[net]],
                        feed_dict=ret_d(player))

                    # backup visit Q values
                    if turn_sim != turn:
                        pu.backup_visit(player, np.array(val, dtype='single'))

                    pu.add_valid_mvs(
                        player, valid_mv_map)  # register valid moves in tree
                    to_coords = pu.choose_moves(
                        player, np.array(pol, dtype='float32'), CPUCT
                    )[0]  # choose moves based on policy and Q values (latter of which already stored in tree)

                    pu.register_mv(player, np.array(
                        to_coords, dtype='int32'))  # register move in tree
                    arch.sess.run(arch.move_frm_inputs,
                                  feed_dict={
                                      arch.moving_player: player,
                                      arch.to_coords_input: to_coords
                                  })  # move network (update GPU vars)

            # backup terminal state
            winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)),
                              dtype='single')
            pu.backup_visit(0, winner)
            pu.backup_visit(1, -winner)

            # return move to previous node in tree
            arch.sess.run(arch.session_restore)  # reset gpu game state
            pu.session_restore()  # reset cpu tree state

            ######################
            # print stats from tree
            if sim % 20 == 0:
                # get valid moves, network policy and value estimates:
                valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map],
                                             feed_dict=ret_d(NET_PLAYER))[1]
                pu.add_valid_mvs(NET_PLAYER,
                                 valid_mv_map)  # register valid moves in tree

                visit_count_map_128 = pu.choose_moves(
                    NET_PLAYER, np.array(pol, dtype='float32'), CPUCT
                )[-1]  # to feed back into tf (entries for all 128 games, not just 1)
                Q_map, P_map, visit_count_map = ret_stats(
                    NET_PLAYER)  # stats we will show on screen

                # move network where it is estimates is its best move
                to_coords = arch.sess.run(
                    [
                        arch.nn_max_prob_to_coords_valid_mvs[net],
                        arch.nn_max_prob_move_unit_valid_mvs[net]
                    ],
                    feed_dict={
                        arch.moving_player: NET_PLAYER,
                        arch.pol[net]: visit_count_map_128
                    })[0]

                pu.register_mv(NET_PLAYER, np.asarray(
                    to_coords, dtype='int32'))  # register move in tree
                arch.sess.run(arch.move_frm_inputs,
                              feed_dict={
                                  arch.moving_player: NET_PLAYER,
                                  arch.to_coords_input: to_coords
                              })  # move network (update GPU vars)

                # get network tree estimates as to where it thinks you will move after it moves
                valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map],
                                             feed_dict=ret_d(
                                                 human_player()))[1]
                pu.add_valid_mvs(human_player(),
                                 valid_mv_map)  # register valid moves in tree

                Q_map_next, P_map_next, visit_count_map_next = ret_stats(
                    human_player())

                arch.sess.run(
                    arch.session_restore)  # restore prior tf game state
                pu.session_restore()  # restore prior tree

                draw(True)
                pygame.display.set_caption('%i %2.1f' %
                                           (sim, time.time() - t_start))

                print 'simulation: ', sim, ' (%i sec)' % (time.time() -
                                                          t_start)

            sim += 1

        ### make move

        # first get valid moves and current policy at board position
        valid_mv_map, pol = arch.sess.run(
            [arch.imgs, arch.valid_mv_map, arch.pol[net]],
            feed_dict=ret_d(NET_PLAYER))[1:]
        pu.add_valid_mvs(NET_PLAYER, valid_mv_map)  # set in tree

        visit_count_map_128 = pu.choose_moves(
            NET_PLAYER, np.array(pol, dtype='float32'), CPUCT
        )[-1]  # to feed back into tf (entries for all 128 games, not just 1)
        Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER)

        # makes moves as if this were still part of the self-play (max visit count)
        #to_coords = arch.sess.run([arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={arch.moving_player: 0,
        #				arch.visit_count_map: visit_count_map})[0]

        # move to max visited node:
        #if turn != 0:
        to_coords = arch.sess.run([
            arch.nn_max_prob_to_coords_valid_mvs[net],
            arch.nn_max_prob_move_unit_valid_mvs[net]
        ],
                                  feed_dict={
                                      arch.moving_player: NET_PLAYER,
                                      arch.pol[net]: visit_count_map_128
                                  })[0]

        # randomly move proportionatly to vist counts
        #else:
        #	to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: 0,
        #			arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts

        pu.register_mv(NET_PLAYER, np.array(to_coords, dtype='int32'))

        print 'pruning...'
        pu.prune_tree(
            1)  # 0: prune all games in batch, 1: prune only first game
        print time.time() - t_start

    print 'finished'
    return arch.sess.run(arch.gm_vars['board'])[0]
Beispiel #5
0
def eval_model():
    set_weights()

    while True:
        arch.sess.run(arch.init_state)
        pu.init_tree()
        turn_start_t = time.time()

        ### choose order
        with eval_stats_lock:
            if scope_next.value == 0:
                scopes = ['main', 'eval']
            else:
                scopes = ['eval', 'main']

            scope_next.value = 1 - scope_next.value

        scopes = np.asarray(scopes)

        for turn in range(N_TURNS):
            ### make move
            for player, s in zip([0, 1], scopes):
                if eval_batch_sets_played.value >= (2 * N_GATE_BATCH_SETS):
                    return  # finished

                run_sim(turn, player, scopes=scopes)

                valid_mv_map, pol = arch.sess.run(
                    [arch.valid_mv_map, arch.pol[s]],
                    feed_dict=ret_d(player))  # generate batch and valid moves

                #########
                pu.add_valid_mvs(player,
                                 valid_mv_map)  # register valid moves in tree
                visit_count_map = pu.choose_moves(
                    player, np.array(pol, dtype='single'),
                    CPUCT)[-1]  # get number of times each node was visited

                to_coords = arch.sess.run(
                    [arch.tree_prob_visit_coord, arch.tree_prob_move_unit],
                    feed_dict={
                        arch.moving_player: player,
                        arch.visit_count_map: visit_count_map
                    })[0]  # make move in proportion to visit counts

                pu.register_mv(player, np.array(
                    to_coords, dtype='int32'))  # register move in tree

            pu.prune_tree(0)

            if (turn + 1) % 2 == 0:
                print 'eval finished turn %i (%i sec) GPU %i eval_batch_sets_played %i' % (
                    turn, time.time() - turn_start_t, WORKER_ID,
                    eval_batch_sets_played.value)

        with eval_stats_lock:
            # do not add any more stats for these conditions
            if eval_batch_sets_main_first.value >= N_GATE_BATCH_SETS and scopes[
                    0] == 'main':
                continue
            if (eval_batch_sets_played.value - eval_batch_sets_main_first.value
                ) >= N_GATE_BATCH_SETS and scopes[0] == 'eval':
                continue

            eval_player = np.nonzero(scopes == 'eval')[0][0]
            res = arch.sess.run(arch.winner,
                                feed_dict={arch.moving_player: eval_player})
            print 'ties', (res == 0).sum(), 'wins', (
                res == 1).sum(), 'rate %2.3f' % (
                    (res == 1).sum() / np.single(gv.BATCH_SZ)), 'opp wins', (
                        res == -1).sum(), scopes
            eval_games_won.value += np.int((res == 1).sum())
            eval_batch_sets_played.value += 1
            eval_batch_sets_main_first.value += int(scopes[0] == 'main')
            print_eval_stats()