Ejemplo n.º 1
0
 def nnfunc(game):
     nips = naf.NetInputs(game)
     rot = random.randint(0, 3) if self.random_rotation else 0
     nips.rotate(rot)
     pw, ml_r = nncli.eval(nips)
     if type(pw) != numpy.float32:
         assert pw.shape[0] == 3
         pw = naf.three_to_one(pw)
     ml_0 = naf.rotate_policy_array(ml_r, rot)
     return pw, ml_0
Ejemplo n.º 2
0
        def set_reply(reply):
            p0 = numpy.frombuffer(reply, dtype=numpy.float32)
            nml = twixt.Game.SIZE * (twixt.Game.SIZE - 2)

            if p0.shape[0] == nml + 1:
                leaf.score = p0[0]
                movelogits = naf.rotate_policy_array(p0[1:], rot)
            elif p0.shape[0] == nml + 3:
                leaf.score = naf.three_to_one(p0[0:3])
                movelogits = naf.rotate_policy_array(p0[3:], rot)
            else:
                raise TypeError("Bad shape:", p0.shape)

            maxlogit = movelogits[leaf.LMnz].max()
            el = numpy.exp(movelogits - maxlogit)
            divisor = el[leaf.LMnz].sum()
            leaf.P = el / divisor
            self.finished_leaves.append(leaf)
            assert self.leaves_waiting > 0
            self.leaves_waiting -= 1
Ejemplo n.º 3
0
 def nnfunc(game):
     nips = naf.NetInputs(game)
     if self.random_rotation:
         rot = random.randint(0, 3)
         nips.rotate(rot)
     else:
         rot = 0
     pw, ml = nneval_.eval_one(nips)
     if len(pw) == 3:
         pw = naf.three_to_one(pw)
     return pw, naf.rotate_policy_array(ml, rot)
Ejemplo n.º 4
0
def collect_one_train_state():
    while True:
        bnum, y, fi = selector.random_item()
        assert y >= 0 and y < fi.count, (y, fi)
        fi.f.seek(y * naf.LearningState.NUM_BYTES)
        b = fi.f.read(naf.LearningState.NUM_BYTES)
        assert len(b) == naf.LearningState.NUM_BYTES
        try:
            ts = naf.LearningState.from_bytes(b, "%s:%d" % (fi.name, y))
        except ValueError as ve:
            print "Errored on %s:%d!!" % (fi.name, y)
            continue

        if ts.N.any():
            r = random.randint(0, 3)
            ts.nips.rotate(r)
            ts.N = naf.rotate_policy_array(ts.N, r)
            return ts
Ejemplo n.º 5
0
thinker_moves = []

if args.moves:
    for m in args.moves.split(','):
	game.play(m)

if args.nneval:
    import naf
    import nneval
    import random
    ROT = random.randint(0,3)
    ne = nneval.NNEvaluater(args.nneval)
    nips = naf.NetInputs(game)
    nips.rotate(ROT)
    pw, ml = ne.eval_one(nips)
    ml = naf.rotate_policy_array(ml[0,:], ROT)
    print "pwin=",pw

    LM = naf.legal_move_policy_array(game)
    LMnz = LM.nonzero()
    max_ml = ml[LMnz].max()
    el = numpy.exp(ml - max_ml)
    divisor = el[LMnz].sum()
    P = el / divisor
    # print P
    P[(1-LM).nonzero()] = 0
    inds = numpy.argsort(P)
    for idx, i in enumerate(reversed(inds[-40:])):
        coord = naf.policy_index_point(game, i)
        possible_moves.append(coord)
        if idx<10: