Example #1
0
    def __init__(self, game_info, verbose=False, fast_reset=False):
        self.game_info = game_info
        self.verbose = verbose
        self.fast_reset = fast_reset

        # used to convert to base state
        self.symbol_factory = SymbolFactory()

        self.sm = self.game_info.get_sm()
        self.game = self.game_info.game
        self.match_id = None

        # store a joint move / basestate internally
        self.joint_move = self.sm.get_joint_move()
        self.next_basestate = self.sm.new_base_state()

        # XXX we really shouldn't need to do this... why not just use model??? XXX
        def get_base_tuple(i):
            return tuple(
                self.symbol_factory.to_symbols(game_info.model.bases[i]))[0]

        self.bases = [
            get_base_tuple(i) for i in range(self.next_basestate.len())
        ]

        self.players = []
        self.players_map = {}

        # updated after game is finished
        self.scores = {}

        if verbose:
            log.info("GAMEMASTER: create a gamemaster for game %s" % self.game)

        self.matches = None
Example #2
0
    def on_meta_gaming(self, finish_time):
        if self.conf.verbose:
            log.info("PUCTPlayer, match id: %s" % self.match.match_id)

        if self.sm is None or "*" in self.conf.generation:
            if "*" in self.conf.generation:
                log.warning("Using recent generation %s" %
                            self.conf.generation)

            game_info = self.match.game_info
            self.sm = game_info.get_sm()

            man = get_manager()
            gen = self.conf.generation

            self.nn = man.load_network(game_info.game, gen)
            self.poller = PlayPoller(self.sm, self.nn, attr.asdict(self.conf))

            def get_noop_idx(actions):
                for idx, a in enumerate(actions):
                    if "noop" in a:
                        return idx
                assert False, "did not find noop"

            self.role0_noop_legal, self.role1_noop_legal = map(
                get_noop_idx, game_info.model.actions)

        self.poller.player_reset(self.match.game_depth)
Example #3
0
    def on_next_move(self, finish_time):
        log.info("PUCTPlayer.on_next_move(), %s" % self.get_name())
        current_state = self.match.get_current_state()
        self.sm.update_bases(current_state)

        if (self.sm.get_legal_state(0).get_count() == 1
                and self.sm.get_legal_state(0).get_legal(0)
                == self.role0_noop_legal):
            lead_role_index = 1

        else:
            assert (self.sm.get_legal_state(1).get_count() == 1
                    and self.sm.get_legal_state(1).get_legal(0)
                    == self.role1_noop_legal)
            lead_role_index = 0

        if lead_role_index == self.match.our_role_index:
            max_iterations = self.conf.playouts_per_iteration
        else:
            max_iterations = self.conf.playouts_per_iteration_noop

        current_state = self.match.get_current_state()

        self.poller.player_move(basestate_to_ptr(current_state),
                                max_iterations, finish_time)
        self.poller.poll_loop()

        move, prob, node_count = self.poller.player_get_move(
            self.match.our_role_index)
        self.last_probability = prob
        self.last_node_count = node_count
        return move
Example #4
0
def test_with_database():
    gdl_str = get_gdl_for_game("connectFour")

    mapping, info = lookup.by_gdl(gdl_str)

    assert mapping is None
    assert info.game == "connectFour"
    sm = info.get_sm()

    # ensure keeps returning valid statemachines
    for ii in range(10):
        new_mapping, new_info = lookup.by_gdl(gdl_str)
        new_sm = new_info.get_sm()

        assert new_mapping is None
        assert new_info is info
        assert new_sm != sm
        assert id(new_sm) != id(sm)
        assert new_sm.get_initial_state() == sm.get_initial_state()
        interface.dealloc_statemachine(new_sm)

    # finally run rollouts in c++ on the original sm
    log.info("Testing sm %s" % sm)
    msecs_taken, rollouts, _ = interface.depth_charge(sm, 1)
    rollouts_per_second = (rollouts / float(msecs_taken)) * 1000
    log.info("c++ rollouts per second %.2f" % rollouts_per_second)
Example #5
0
def test_not_in_database():
    some_simple_game = """
  (role white)
  (role black)

  (init o1)

  (legal white a)
  (legal white b)
  (legal black a)

  (<= (next o2) (does white a) (true o1))
  (<= (next o3) (does white b) (true o1))

  (<= (goal white 0) (true o1))
  (<= (goal white 10) (true o2))
  (<= (goal white 90) (true o3))

  (<= (goal black 0) (true o1))
  (<= (goal black 90) (true o2))
  (<= (goal black 10) (true o3))

  (<= terminal (true o2))
  (<= terminal (true o3))
    """

    mapping, info = lookup.by_gdl(some_simple_game)
    assert info.game == "unknown"
    sm = info.get_sm()

    # run rollouts in c++
    msecs_taken, rollouts, _ = interface.depth_charge(sm, 1)
    rollouts_per_second = (rollouts / float(msecs_taken)) * 1000
    log.info("c++ rollouts per second %.2f" % rollouts_per_second)
Example #6
0
    def save_sample_data(self):
        if self.training_in_progress:
            log.warning("skip writing json (gzipped): %s" %
                        self.sample_data_filename)
            return

        gen_samples = datadesc.GenerationSamples()
        gen_samples.game = self.conf.game
        gen_samples.date_created = get_date_string()

        gen_samples.with_generation = self.get_generation_name(
            self.conf.current_step)

        # only save the minimal number for this run
        gen_samples.num_samples = min(len(self.accumulated_samples),
                                      self.conf.num_samples_to_train)
        gen_samples.samples = self.accumulated_samples[:gen_samples.
                                                       num_samples]

        # write json file
        json.encoder.FLOAT_REPR = lambda f: ("%.5f" % f)

        log.info("writing json (gzipped): %s" % self.sample_data_filename)
        with gzip.open(self.sample_data_filename, 'w') as f:
            f.write(attrutil.attr_to_json(gen_samples, pretty=False))

        return gen_samples
Example #7
0
def load_module(kif_filename):
    ''' attempts to load a python module with the same filename.  If it does not exist, will run
        java and use ggp-base to create the module. '''

    basename, props_file = kif_filename_to_propfile(kif_filename)
    for cmd in [
            "java -XX:+UseSerialGC -Xmx8G propnet_convert.Convert %s %s" %
        (kif_filename, props_file),
            "java propnet_convert.Convert %s %s" % (kif_filename, props_file),
            "SOMETHING IS BROKEN in install ..."
    ]:
        try:
            # rather unsafe cache, if kif file changes underneath our feet - tough luck.
            module = importlib.import_module("ggplib.props." + basename)
            break
        except ImportError:
            # run java ggp-base to create a propnet.  The resultant propnet will be in props_dir, which can be imported.
            log.debug("Running: %s" % cmd)
            return_code, out, err = run(cmd, shell=True, timeout=60)
            if return_code != 0:
                log.warning("Error code: %s" % err)
            else:
                for l in out.splitlines():
                    log.info("... %s" % l)

            if "SOMETHING" in cmd:
                raise

    return module
Example #8
0
    def on_request_samples(self, server, msg):
        self.on_request_samples_time = time.time()

        assert self.supervisor is not None
        self.samples = []
        self.supervisor.reset_stats()

        log.debug("Got request for sample with number unique states %s" %
                  len(msg.new_states))

        # update duplicates
        for s in msg.new_states:
            self.supervisor.add_unique_state(decode_state(s))

        start_time = time.time()
        self.supervisor.poll_loop(do_stats=True, cb=self.cb_from_superviser)

        msg = "#samp %d, pred()s %d/%d, py/pred/all %.1f/%.1f/%.1f"
        log.info(
            msg %
            (len(self.samples), self.supervisor.num_predictions_calls,
             self.supervisor.total_predictions,
             self.supervisor.acc_time_polling,
             self.supervisor.acc_time_prediction, time.time() - start_time))

        m = msgs.RequestSampleResponse(self.samples, 0)
        server.send_msg(m)
Example #9
0
    def update_value_weighting(self, value_weight, force_compile=False):
        ''' dynamic value weighting.  Based off of the value loss, as an approximated of overfitting
        value head '''

        # add these as hyper parameters?  Not sure if ever want to change them.  This function
        # should be a hyperparmeter.
        value_weight_reduction = 0.333
        value_weight_min = 0.05

        log.info("controller.value_loss_diff %.3f" %
                 self.controller.value_loss_diff)

        orig_weight = value_weight
        if self.controller.value_loss_diff > 0.004:
            value_weight *= value_weight_reduction
        elif self.controller.value_loss_diff > 0.001:
            value_weight *= (value_weight_reduction * 2)
        else:
            # increase it again???
            if self.controller.value_loss_diff < 0:
                value_weight /= value_weight_reduction

            elif orig_weight < 0.5 and self.controller.value_loss_diff < 0.002:
                value_weight /= (value_weight_reduction * 2)

        value_weight = min(max(value_weight_min, value_weight), 1.0)
        if force_compile or abs(value_weight - orig_weight) > 0.0001:
            self.compile_nn(value_weight)

        return value_weight
Example #10
0
    def handle_start(self, symbols):
        assert len(symbols) == 6
        match_id = symbols[1]
        role = symbols[2]
        gdl = symbols[3]
        meta_time = int(symbols[4])
        move_time = int(symbols[5])

        if self.current_match is not None:
            log.debug("GOT A START message for %s while already playing match" % match_id)
            return "busy"
        else:
            log.info("Starting new match %s" % match_id)

            # lookup game and create match
            gdl_symbol_mapping, game_info = lookup.by_gdl(gdl)
            self.current_match = match.Match(game_info, match_id, role, meta_time,
                                             move_time, self.player,
                                             cushion_time=CUSHION_TIME,
                                             gdl_symbol_mapping=gdl_symbol_mapping)
            try:
                # start gameserver timeout
                self.update_gameserver_timeout(self.current_match.meta_time)

                self.current_match.do_start()
                return "ready"

            except match.BadGame:
                return "busy"
Example #11
0
    def cleanup(self, keep_sm=False):
        try:
            self.player.cleanup()
            if self.verbose:
                log.verbose("done cleanup player: %s" % self.player)
        except Exception as exc:
            log.error("FAILED TO CLEANUP PLAYER: %s" % exc)
            type, value, tb = sys.exc_info()
            log.error(traceback.format_exc())

        # cleanup c++ stuff
        if self.verbose:
            log.warning("cleaning up c++ stuff")

        # all the basestates
        for bs in self.states:
            # cleanup bs
            interface.dealloc_basestate(bs)

        self.states = []

        if self.joint_move:
            interface.dealloc_jointmove(self.joint_move)
            self.joint_move = None

        if self.sm and not keep_sm:
            interface.dealloc_statemachine(self.sm)
            self.sm = None

        if self.verbose:
            log.info("match - done cleaning up")
Example #12
0
    def on_request_samples(self, server, msg):
        self.on_request_samples_time = time.time()

        assert self.supervisor is not None
        self.samples = []
        self.supervisor.reset_stats()

        log.debug("Got request for sample with number unique states %s" % len(msg.new_states))

        # update duplicates
        for s in msg.new_states:
            # note we decode the string and set it rawly.  using decode_state() was too slow.
            self.supervisor.add_unique_state(base64.decodestring(s))

        start_time = time.time()
        self.supervisor.poll_loop(do_stats=True, cb=self.cb_from_superviser)

        msg = "#samp %d, pred()s %d/%d, py/pred/all %.1f/%.1f/%.1f"
        time_since_last = time.time() - start_time
        log.info(msg % (len(self.samples),
                        self.supervisor.num_predictions_calls,
                        self.supervisor.total_predictions,
                        self.supervisor.acc_time_polling,
                        self.supervisor.acc_time_prediction,
                        time_since_last))

        predicts_per_sec = self.supervisor.total_predictions / time_since_last
        log.info("Average pred p/s %.1f" % predicts_per_sec)

        m = msgs.RequestSampleResponse(self.samples, 0)
        server.send_msg(m)
Example #13
0
    def __init__(self, gdl_str, verbose=False, fast_reset=False):
        self.verbose = verbose
        self.fast_reset = fast_reset

        # used to convert to base state
        self.symbol_factory = SymbolFactory()

        self.gdl_str = gdl_str
        _, info = lookup.by_gdl(gdl_str)
        self.sm = info.get_sm()
        self.game = info.game
        self.match_id = None

        # store a joint move / basestate internally
        self.joint_move = self.sm.get_joint_move()
        self.next_basestate = self.sm.new_base_state()

        def get_base_tuple(i):
            return tuple(self.symbol_factory.to_symbols(self.sm.get_gdl(i)))[0]

        self.bases = [get_base_tuple(i) for i in range(self.next_basestate.len())]

        self.players = []
        self.players_map = {}

        # updated after game is finished
        self.scores = {}

        if verbose:
            log.info("GAMEMASTER: create a gamemaster for game %s" % self.game)
        self.matches = None
Example #14
0
    def checkpoint(self):
        num_samples = len(self.accumulated_samples)
        log.verbose("entering checkpoint with %s sample accumulated" %
                    num_samples)
        if num_samples > 0:
            gen_samples = self.save_sample_data()

            if num_samples > self.conf.num_samples_to_train:
                if self.pending_gen_samples is None:
                    log.info(
                        "data done for: %s" %
                        self.get_generation_name(self.conf.current_step + 1))
                    self.pending_gen_samples = gen_samples

                if not self.training_in_progress:
                    if self.the_nn_trainer is None:
                        log.error("There is no trainer - please start")
                    else:
                        self.send_request_to_train_nn()

        # cancel any existing cb
        if self.checkpoint_cb is not None and self.checkpoint_cb.active():
            self.checkpoint_cb.cancel()

        # call checkpoint again in n seconds
        self.checkpoint_cb = reactor.callLater(self.conf.checkpoint_interval,
                                               self.checkpoint)
Example #15
0
    def send_request_to_train_nn(self):
        assert not self.training_in_progress
        next_step = self.conf.current_step + 1

        log.verbose("send_request_to_train_nn() @ step %s" % next_step)

        train_conf = self.conf.base_training_config
        assert train_conf.game == self.conf.game
        assert train_conf.generation_prefix == self.conf.generation_prefix

        train_conf.next_step = next_step

        m = msgs.RequestNetworkTrain()
        m.game = self.conf.game
        m.train_conf = train_conf

        m.network_model = self.conf.base_network_model
        m.generation_description = self.conf.base_generation_description

        # send out message to train
        self.the_nn_trainer.worker.send_msg(m)

        log.info("sent out request to the_nn_trainer!")

        self.training_in_progress = True
Example #16
0
    def configure_self_play(self):
        assert self.self_play_conf is not None

        if self.nn is None:
            self.nn = get_manager().load_network(self.game_info.game,
                                                 self.latest_generation_name)

        if self.supervisor is None:
            self.supervisor = cppinterface.Supervisor(
                self.sm,
                self.nn,
                batch_size=self.conf.self_play_batch_size,
                sleep_between_poll=self.conf.sleep_between_poll)

            self.supervisor.start_self_play(self.self_play_conf,
                                            self.conf.num_workers)

        else:
            # force exit of the worker if there was an update to the config
            if self.conf.exit_on_update_config:
                os._exit(0)

            log.info("Latest generation: %s" % self.latest_generation_name)
            gen = int(self.latest_generation_name.split("_")[-1])
            if gen % self.conf.replace_network_every_n_gens == 0:
                log.warning("Updating network to: %s" % gen)
                self.supervisor.update_nn(self.nn)

            self.supervisor.clear_unique_states()
Example #17
0
    def on_epoch_begin(self, epoch, logs=None):
        if self.retrain_best is None and self.retraining:
            log.info(
                'Reusing old retraining network for *next* retraining network')
            self.retrain_best = self.model.get_weights()

        self.at_epoch += 1
Example #18
0
    def on_worker_config(self, worker, msg):
        info = self.workers[worker]

        # can be both
        if not (msg.conf.do_training or msg.conf.do_self_play):
            msg = "worker not configured properly (neither self play or trainer)"
            raise Exception(msg)

        info.conf = msg.conf
        if info.conf.do_training:
            # protection against > 1 the_nn_trainer
            if self.the_nn_trainer is not None:
                raise Exception("the_nn_trainer already set")

            log.info("worker trainer set %s" % worker)
            self.the_nn_trainer = info

        if info.conf.do_self_play:
            if info.conf.self_play_batch_size < 1:
                raise Exception("self play and self_play_batch_size < 1 (%d)" %
                                self.concurrent_plays)

            info.reset()
            self.free_players.append(info)

            log.info("worker added as self play %s" % worker)

            # configure player will happen in schedule_players
            reactor.callLater(0, self.schedule_players)
Example #19
0
    def save(self):
        # XXX set generation attributes

        man = get_manager()

        man.save_network(self.nn, generation_name=self.next_generation)
        self.do_callbacks()

        ###############################################################################
        # save a previous model for next time
        if self.controller.retrain_best is None:
            log.warning("No retraining network")
            return

        log.info("Saving retraining network with val_policy_acc: %.4f" %
                 (self.controller.retrain_best_val_policy_acc))

        # there is an undocumented keras clone function, but this is sure to work (albeit slow and evil)
        from ggpzero.util.keras import keras_models

        for_next_generation = "%s_prev" % self.next_generation

        prev_model = keras_models.model_from_json(
            self.nn.keras_model.to_json())
        prev_model.set_weights(self.controller.retrain_best)

        prev_generation_descr = attrutil.clone(self.nn.generation_descr)
        prev_generation_descr.name = for_next_generation
        prev_nn = network.NeuralNetwork(self.nn.gdl_bases_transformer,
                                        prev_model, prev_generation_descr)
        man.save_network(prev_nn, for_next_generation)
        self.do_callbacks()
Example #20
0
 def debug(self):
     # good to see some outputs
     for x in (10, 420, 42):
         log.info('train input, shape: %s.  Example: %s' %
                  (self.inputs.shape, self.inputs[x]))
         for o in self.outputs:
             log.info('train output, shape: %s.  Example: %s' %
                      (o.shape, o[x]))
Example #21
0
    def on_meta_gaming(self, finish_time):
        log.info("%s meta Gaming: match: %s" % (self.name, self.match.match_id))

        self.sm = self.match.sm.dupe()

        self.proxy = self.meta_create_player()

        # ensure we are in the right state
        self.sm.update_bases(self.match.get_current_state())
        self.proxy.on_meta_gaming(finish_time)
Example #22
0
 def init_data_rxd(self, data):
     self.start_buf += data
     if len(self.start_buf) == self.CHALLENGE_SIZE:
         if self.expected_response == self.start_buf:
             self.logical_connection = True
             log.info("Logical connection made")
             self.broker.new_broker_client(self)
         else:
             self.logical_connection = True
             log.error("Logical connection failed")
             self.disconnect()
Example #23
0
    def on_meta_gaming(self, finish_time):
        log.info("%s meta Gaming: match: %s" % (self.name, self.match.match_id))
        self.sm = self.match.sm.dupe()

        # get and cache fast move and legals
        self.joint_move = self.sm.get_joint_move()
        self.depth_charge_joint_move = self.sm.get_joint_move()
        self.depth_charge_state = self.sm.new_base_state()
        self.role_count = len(self.sm.get_roles())

        # store the node so we can return info on move
        self.root = None
Example #24
0
def play_runner(player, port):
    interface.initialise_k273(1, log_name_base=player.get_name())
    log.initialise()

    ggp = GGPServer()
    ggp.set_player(player)
    site = server.Site(ggp)

    log.info("Running player '%s' on port %d" % (player.get_name(), port))

    reactor.listenTCP(port, site)
    reactor.run()
Example #25
0
def main_2(game_name, seconds_to_run):
    game_info = lookup.by_name(game_name)
    sm = game_info.get_sm()

    msecs_taken, rollouts, num_state_changes = go(sm, seconds_to_run)

    log.info("====================================================")
    log.info("performance test game %s" % game_name)
    log.info("ran for %.3f seconds, state changes %s, rollouts %s" %
             ((msecs_taken / 1000.0), num_state_changes, rollouts))
    log.info("rollouts per second: %s" % (rollouts / (msecs_taken / 1000.0)))
    log.info("====================================================")
Example #26
0
    def on_epoch_begin(self, epoch, logs=None):
        self.at_epoch += 1
        log.info('Epoch %d/%d' % (self.at_epoch, self.num_epochs))

        # oh man, keras consistency... XXX
        try:
            self.target = self.params['samples']
        except KeyError:
            self.target = self.params['steps'] * 512

        self.progbar = Progbar(target=self.target)
        self.seen = 0
Example #27
0
    def on_sample_response(self, worker, msg):
        info = self.workers[worker]
        if msg.samples:
            self.add_new_samples(msg.samples)

            if msg.duplicates_seen:
                log.info("worker saw %s duplicates" % msg.duplicates_seen)

            log.info("len accumulated_samples: %s" %
                     len(self.accumulated_samples))

        self.free_players.append(info)
        reactor.callLater(0, self.schedule_players)
Example #28
0
def main(args):
    port = int(args[0])
    path_to_viewer = args[1]
    interface.initialise_k273(1, log_name_base="web")
    log.initialise()

    root = WebServer(path_to_viewer)
    site = server.Site(root)

    log.info("Running WebServer on port %d" % port)

    reactor.listenTCP(port, site)
    reactor.run()
Example #29
0
    def do_play(self, move):
        enter_time = time.time()
        if self.verbose:
            log.debug("do_play: %s" % (move, ))

        if move is not None:
            self.apply_move(move)

        current_state = self.get_current_state()
        if self.verbose:
            current_str = self.game_info.model.basestate_to_str(current_state)
            log.info("Current state : '%s'" % current_str)
        self.sm.update_bases(current_state)
        if self.sm.is_terminal():
            return "done"

        end_time = enter_time + self.move_time
        if self.cushion_time > 0:
            end_time -= self.cushion_time

        legal_choice = self.player.on_next_move(end_time)

        # we have no idea what on_next_move() left the state machine.  So reverting it back to
        # correct state here.
        self.sm.update_bases(self.get_current_state())

        # get possible possible legal moves and check 'move' is a valid
        ls = self.sm.get_legal_state(self.our_role_index)

        # store last move (in our own mapping, *not* gamemaster)
        self.last_played_move = self.sm.legal_to_move(self.our_role_index,
                                                      legal_choice)

        # check the move remaps and is a legal choice
        move = self.legal_to_gamemaster_move(legal_choice)
        legal_moves = [
            self.legal_to_gamemaster_move(ls.get_legal(ii))
            for ii in range(ls.get_count())
        ]
        if move not in legal_moves:
            msg = "Choice was %s not in legal choices %s" % (move, legal_moves)
            log.critical(msg)
            raise CriticalError(msg)

        if self.verbose:
            log.info("(%s) do_play '%s' sending move: %s" %
                     (self.player.name, self.role, move))
        return move
Example #30
0
    def on_sample_response(self, worker, msg):
        info = self.workers[worker]
        if len(msg.samples) > 0:
            dupe_count = self.add_new_samples(msg.samples)
            if dupe_count:
                log.warning("dropping %s inflight duplicate state(s)" %
                            dupe_count)

            if msg.duplicates_seen:
                log.info("worker saw %s duplicates" % msg.duplicates_seen)

            log.info("len accumulated_samples: %s" %
                     len(self.accumulated_samples))

        self.free_players.append(info)
        reactor.callLater(0, self.schedule_players)