예제 #1
0
def genmoves(logfn, game):
    moves = {}
    while True:
        env = yield moves
        moves = {}

        for aI, (aN, _) in env.myid.iteritems():
            neighbors = antmath.neighbors(aN)
            walls = set(n for n in neighbors if n in env.water)
            if walls:
                ways = list(set(neighbors) - walls)
                moves[aI] = antmath.loc_displacement(aN, random.choice(ways))

        moves = env.supplement(moves)
예제 #2
0
def meta(logfn, game):
    vectors = list('NESW=')

    # experts (generators)
    experts = [e.genmoves(logfn, game) for e in EXPERTS]
    enames = [e.__name__ for e in EXPERTS]
    logfn and logfn('# ' + ' '.join(n[n.find('.') + 1:] for n in \
                                    enames + ['ant-count']),
                    noprefix=True)
    for e in experts:
        e.next() # coroutine warmup

    # learner (generator)
    # 0.9 learns slowly and 0.1 learns quickly
    hedge = Hedge(0.25, len(experts))
    faith = hedge.next()

    # loop
    env = environment.LazyEnvDigest((0, 0), 0, None) # for warmup
    while True:

        logfn and logfn(' '.join(str(f) for f in \
                                 faith + [len(env.myant)]), noprefix=True)

        # query each strategy
        # eg:
        #   moves = [{1:'N', 2:'E',        5:'='},
        #            {1:'N', 2:'S', 4:'S', 5:'W'}]

        if len(env.myant) >= 250:
            env.food.clear()
        moves = [e.send(env) for e in experts]

        # make sure ants have orders from at least one strategy
        #assert all(env.myid - m.viewkeys() == set() for m in moves)

        # combine the move recommendations into distributions
        # eg:
        #   faith = [0.8, 0.2]
        # result:
        #   lincomb = {1: ({'N':1.0, 'E':0.0, 'S':0.0, 'W':0.0, '=':0.0}, {}),
        #              2: ({'N':0.0, 'E':0.8, 'S':0.2, 'W':0.0, '=':0.0}, {}),
        #              4: ({'N':0.0, 'E':0.0, 'S':0.2, 'W':0.0, '=':0.0}, {}),
        #              5: ({'N':0.0, 'E':0.0, 'S':0.0, 'W':0.8, '=':0.2}, {})}
        #
        # actually, each probability is really a 2-tuple in which the second
        # part is a list of the strategy indexes that output that vector
        # eg:
        #   lincomb = {1: {'N':(1.0, [0, 1]), ...
        #
        # this way we blame strategy 0 and strategy 1 when moving ant 1 north
        # causes it to die
        #
        # you'll notice in the example that ant 4's entry doesn't sum to one
        # because strategy 0 didn't recommend anything for it -- this is
        # compensated for in 'distpick'

        lincomb = {aI: {vect: (fsum(f for f, m in it.izip(faith, moves) \
                                    if aI in m and m[aI] == vect),
                               [i for i, m in enumerate(moves) \
                                if aI in m and m[aI] == vect]
                               ) \
                        for vect in vectors} \
                   for aI in env.myid}

        # make a probabalistic generator for each ant's decision-vector
        # get a new environment

        oldfood = env.food.copy()
        env = yield {env.myid[aI][0]: distpicker(vd) \
                     for aI, vd in lincomb.iteritems()}

        # last minute hack: don't process more than 100 ants each turn
        # unfortunately, this bot is not efficient, and frequently times-out
        # without this hack

        keys = env.myant.keys()
        while len(env.myant) > 100:
            k = random.choice(keys)
            keys.remove(k)
            env.myant.pop(k)

        # assign loss to strategies according to how the ants fared

        # for each old-ant:
        #   did any strategy tell the ant to do what it did?
        #     Tr> is the ant still alive?
        #           Tr> did the action result in good things?
        #                 Tr> NO LOSS (0)
        #                 Fa> MINOR LOSS (0, 1)
        #           Fa> MAJOR LOSS (1)

        loss = [[] for e in experts]

        for aI, vd in lincomb.iteritems():
            alive = aI in env.myid
            try:
                aN, aO = env.myid[aI] if alive else env.mydeadid[aI]
            except KeyError:
                continue
            move = antmath.loc_displacement(aO, env.unwrap(aO, aN))
            prob, blame = vd[move] if move in vd else (None, None)
            if prob:
                # determine loss for this ant
                als = (0.0 if tookfood(oldfood, env, aN) else 0.1) \
                      if alive else 1.0
                for i in blame:
                    loss[i].append(als)

        # sum loss by strategy and apply to hedge

        faith = hedge.send([fsum(l) / len(lincomb) if lincomb else 0.0 \
                            for l in loss])