def __init__(self, game_info, verbose=False, fast_reset=False): self.game_info = game_info self.verbose = verbose self.fast_reset = fast_reset # used to convert to base state self.symbol_factory = SymbolFactory() self.sm = self.game_info.get_sm() self.game = self.game_info.game self.match_id = None # store a joint move / basestate internally self.joint_move = self.sm.get_joint_move() self.next_basestate = self.sm.new_base_state() # XXX we really shouldn't need to do this... why not just use model??? XXX def get_base_tuple(i): return tuple( self.symbol_factory.to_symbols(game_info.model.bases[i]))[0] self.bases = [ get_base_tuple(i) for i in range(self.next_basestate.len()) ] self.players = [] self.players_map = {} # updated after game is finished self.scores = {} if verbose: log.info("GAMEMASTER: create a gamemaster for game %s" % self.game) self.matches = None
def on_meta_gaming(self, finish_time): if self.conf.verbose: log.info("PUCTPlayer, match id: %s" % self.match.match_id) if self.sm is None or "*" in self.conf.generation: if "*" in self.conf.generation: log.warning("Using recent generation %s" % self.conf.generation) game_info = self.match.game_info self.sm = game_info.get_sm() man = get_manager() gen = self.conf.generation self.nn = man.load_network(game_info.game, gen) self.poller = PlayPoller(self.sm, self.nn, attr.asdict(self.conf)) def get_noop_idx(actions): for idx, a in enumerate(actions): if "noop" in a: return idx assert False, "did not find noop" self.role0_noop_legal, self.role1_noop_legal = map( get_noop_idx, game_info.model.actions) self.poller.player_reset(self.match.game_depth)
def on_next_move(self, finish_time): log.info("PUCTPlayer.on_next_move(), %s" % self.get_name()) current_state = self.match.get_current_state() self.sm.update_bases(current_state) if (self.sm.get_legal_state(0).get_count() == 1 and self.sm.get_legal_state(0).get_legal(0) == self.role0_noop_legal): lead_role_index = 1 else: assert (self.sm.get_legal_state(1).get_count() == 1 and self.sm.get_legal_state(1).get_legal(0) == self.role1_noop_legal) lead_role_index = 0 if lead_role_index == self.match.our_role_index: max_iterations = self.conf.playouts_per_iteration else: max_iterations = self.conf.playouts_per_iteration_noop current_state = self.match.get_current_state() self.poller.player_move(basestate_to_ptr(current_state), max_iterations, finish_time) self.poller.poll_loop() move, prob, node_count = self.poller.player_get_move( self.match.our_role_index) self.last_probability = prob self.last_node_count = node_count return move
def test_with_database(): gdl_str = get_gdl_for_game("connectFour") mapping, info = lookup.by_gdl(gdl_str) assert mapping is None assert info.game == "connectFour" sm = info.get_sm() # ensure keeps returning valid statemachines for ii in range(10): new_mapping, new_info = lookup.by_gdl(gdl_str) new_sm = new_info.get_sm() assert new_mapping is None assert new_info is info assert new_sm != sm assert id(new_sm) != id(sm) assert new_sm.get_initial_state() == sm.get_initial_state() interface.dealloc_statemachine(new_sm) # finally run rollouts in c++ on the original sm log.info("Testing sm %s" % sm) msecs_taken, rollouts, _ = interface.depth_charge(sm, 1) rollouts_per_second = (rollouts / float(msecs_taken)) * 1000 log.info("c++ rollouts per second %.2f" % rollouts_per_second)
def test_not_in_database(): some_simple_game = """ (role white) (role black) (init o1) (legal white a) (legal white b) (legal black a) (<= (next o2) (does white a) (true o1)) (<= (next o3) (does white b) (true o1)) (<= (goal white 0) (true o1)) (<= (goal white 10) (true o2)) (<= (goal white 90) (true o3)) (<= (goal black 0) (true o1)) (<= (goal black 90) (true o2)) (<= (goal black 10) (true o3)) (<= terminal (true o2)) (<= terminal (true o3)) """ mapping, info = lookup.by_gdl(some_simple_game) assert info.game == "unknown" sm = info.get_sm() # run rollouts in c++ msecs_taken, rollouts, _ = interface.depth_charge(sm, 1) rollouts_per_second = (rollouts / float(msecs_taken)) * 1000 log.info("c++ rollouts per second %.2f" % rollouts_per_second)
def save_sample_data(self): if self.training_in_progress: log.warning("skip writing json (gzipped): %s" % self.sample_data_filename) return gen_samples = datadesc.GenerationSamples() gen_samples.game = self.conf.game gen_samples.date_created = get_date_string() gen_samples.with_generation = self.get_generation_name( self.conf.current_step) # only save the minimal number for this run gen_samples.num_samples = min(len(self.accumulated_samples), self.conf.num_samples_to_train) gen_samples.samples = self.accumulated_samples[:gen_samples. num_samples] # write json file json.encoder.FLOAT_REPR = lambda f: ("%.5f" % f) log.info("writing json (gzipped): %s" % self.sample_data_filename) with gzip.open(self.sample_data_filename, 'w') as f: f.write(attrutil.attr_to_json(gen_samples, pretty=False)) return gen_samples
def load_module(kif_filename): ''' attempts to load a python module with the same filename. If it does not exist, will run java and use ggp-base to create the module. ''' basename, props_file = kif_filename_to_propfile(kif_filename) for cmd in [ "java -XX:+UseSerialGC -Xmx8G propnet_convert.Convert %s %s" % (kif_filename, props_file), "java propnet_convert.Convert %s %s" % (kif_filename, props_file), "SOMETHING IS BROKEN in install ..." ]: try: # rather unsafe cache, if kif file changes underneath our feet - tough luck. module = importlib.import_module("ggplib.props." + basename) break except ImportError: # run java ggp-base to create a propnet. The resultant propnet will be in props_dir, which can be imported. log.debug("Running: %s" % cmd) return_code, out, err = run(cmd, shell=True, timeout=60) if return_code != 0: log.warning("Error code: %s" % err) else: for l in out.splitlines(): log.info("... %s" % l) if "SOMETHING" in cmd: raise return module
def on_request_samples(self, server, msg): self.on_request_samples_time = time.time() assert self.supervisor is not None self.samples = [] self.supervisor.reset_stats() log.debug("Got request for sample with number unique states %s" % len(msg.new_states)) # update duplicates for s in msg.new_states: self.supervisor.add_unique_state(decode_state(s)) start_time = time.time() self.supervisor.poll_loop(do_stats=True, cb=self.cb_from_superviser) msg = "#samp %d, pred()s %d/%d, py/pred/all %.1f/%.1f/%.1f" log.info( msg % (len(self.samples), self.supervisor.num_predictions_calls, self.supervisor.total_predictions, self.supervisor.acc_time_polling, self.supervisor.acc_time_prediction, time.time() - start_time)) m = msgs.RequestSampleResponse(self.samples, 0) server.send_msg(m)
def update_value_weighting(self, value_weight, force_compile=False): ''' dynamic value weighting. Based off of the value loss, as an approximated of overfitting value head ''' # add these as hyper parameters? Not sure if ever want to change them. This function # should be a hyperparmeter. value_weight_reduction = 0.333 value_weight_min = 0.05 log.info("controller.value_loss_diff %.3f" % self.controller.value_loss_diff) orig_weight = value_weight if self.controller.value_loss_diff > 0.004: value_weight *= value_weight_reduction elif self.controller.value_loss_diff > 0.001: value_weight *= (value_weight_reduction * 2) else: # increase it again??? if self.controller.value_loss_diff < 0: value_weight /= value_weight_reduction elif orig_weight < 0.5 and self.controller.value_loss_diff < 0.002: value_weight /= (value_weight_reduction * 2) value_weight = min(max(value_weight_min, value_weight), 1.0) if force_compile or abs(value_weight - orig_weight) > 0.0001: self.compile_nn(value_weight) return value_weight
def handle_start(self, symbols): assert len(symbols) == 6 match_id = symbols[1] role = symbols[2] gdl = symbols[3] meta_time = int(symbols[4]) move_time = int(symbols[5]) if self.current_match is not None: log.debug("GOT A START message for %s while already playing match" % match_id) return "busy" else: log.info("Starting new match %s" % match_id) # lookup game and create match gdl_symbol_mapping, game_info = lookup.by_gdl(gdl) self.current_match = match.Match(game_info, match_id, role, meta_time, move_time, self.player, cushion_time=CUSHION_TIME, gdl_symbol_mapping=gdl_symbol_mapping) try: # start gameserver timeout self.update_gameserver_timeout(self.current_match.meta_time) self.current_match.do_start() return "ready" except match.BadGame: return "busy"
def cleanup(self, keep_sm=False): try: self.player.cleanup() if self.verbose: log.verbose("done cleanup player: %s" % self.player) except Exception as exc: log.error("FAILED TO CLEANUP PLAYER: %s" % exc) type, value, tb = sys.exc_info() log.error(traceback.format_exc()) # cleanup c++ stuff if self.verbose: log.warning("cleaning up c++ stuff") # all the basestates for bs in self.states: # cleanup bs interface.dealloc_basestate(bs) self.states = [] if self.joint_move: interface.dealloc_jointmove(self.joint_move) self.joint_move = None if self.sm and not keep_sm: interface.dealloc_statemachine(self.sm) self.sm = None if self.verbose: log.info("match - done cleaning up")
def on_request_samples(self, server, msg): self.on_request_samples_time = time.time() assert self.supervisor is not None self.samples = [] self.supervisor.reset_stats() log.debug("Got request for sample with number unique states %s" % len(msg.new_states)) # update duplicates for s in msg.new_states: # note we decode the string and set it rawly. using decode_state() was too slow. self.supervisor.add_unique_state(base64.decodestring(s)) start_time = time.time() self.supervisor.poll_loop(do_stats=True, cb=self.cb_from_superviser) msg = "#samp %d, pred()s %d/%d, py/pred/all %.1f/%.1f/%.1f" time_since_last = time.time() - start_time log.info(msg % (len(self.samples), self.supervisor.num_predictions_calls, self.supervisor.total_predictions, self.supervisor.acc_time_polling, self.supervisor.acc_time_prediction, time_since_last)) predicts_per_sec = self.supervisor.total_predictions / time_since_last log.info("Average pred p/s %.1f" % predicts_per_sec) m = msgs.RequestSampleResponse(self.samples, 0) server.send_msg(m)
def __init__(self, gdl_str, verbose=False, fast_reset=False): self.verbose = verbose self.fast_reset = fast_reset # used to convert to base state self.symbol_factory = SymbolFactory() self.gdl_str = gdl_str _, info = lookup.by_gdl(gdl_str) self.sm = info.get_sm() self.game = info.game self.match_id = None # store a joint move / basestate internally self.joint_move = self.sm.get_joint_move() self.next_basestate = self.sm.new_base_state() def get_base_tuple(i): return tuple(self.symbol_factory.to_symbols(self.sm.get_gdl(i)))[0] self.bases = [get_base_tuple(i) for i in range(self.next_basestate.len())] self.players = [] self.players_map = {} # updated after game is finished self.scores = {} if verbose: log.info("GAMEMASTER: create a gamemaster for game %s" % self.game) self.matches = None
def checkpoint(self): num_samples = len(self.accumulated_samples) log.verbose("entering checkpoint with %s sample accumulated" % num_samples) if num_samples > 0: gen_samples = self.save_sample_data() if num_samples > self.conf.num_samples_to_train: if self.pending_gen_samples is None: log.info( "data done for: %s" % self.get_generation_name(self.conf.current_step + 1)) self.pending_gen_samples = gen_samples if not self.training_in_progress: if self.the_nn_trainer is None: log.error("There is no trainer - please start") else: self.send_request_to_train_nn() # cancel any existing cb if self.checkpoint_cb is not None and self.checkpoint_cb.active(): self.checkpoint_cb.cancel() # call checkpoint again in n seconds self.checkpoint_cb = reactor.callLater(self.conf.checkpoint_interval, self.checkpoint)
def send_request_to_train_nn(self): assert not self.training_in_progress next_step = self.conf.current_step + 1 log.verbose("send_request_to_train_nn() @ step %s" % next_step) train_conf = self.conf.base_training_config assert train_conf.game == self.conf.game assert train_conf.generation_prefix == self.conf.generation_prefix train_conf.next_step = next_step m = msgs.RequestNetworkTrain() m.game = self.conf.game m.train_conf = train_conf m.network_model = self.conf.base_network_model m.generation_description = self.conf.base_generation_description # send out message to train self.the_nn_trainer.worker.send_msg(m) log.info("sent out request to the_nn_trainer!") self.training_in_progress = True
def configure_self_play(self): assert self.self_play_conf is not None if self.nn is None: self.nn = get_manager().load_network(self.game_info.game, self.latest_generation_name) if self.supervisor is None: self.supervisor = cppinterface.Supervisor( self.sm, self.nn, batch_size=self.conf.self_play_batch_size, sleep_between_poll=self.conf.sleep_between_poll) self.supervisor.start_self_play(self.self_play_conf, self.conf.num_workers) else: # force exit of the worker if there was an update to the config if self.conf.exit_on_update_config: os._exit(0) log.info("Latest generation: %s" % self.latest_generation_name) gen = int(self.latest_generation_name.split("_")[-1]) if gen % self.conf.replace_network_every_n_gens == 0: log.warning("Updating network to: %s" % gen) self.supervisor.update_nn(self.nn) self.supervisor.clear_unique_states()
def on_epoch_begin(self, epoch, logs=None): if self.retrain_best is None and self.retraining: log.info( 'Reusing old retraining network for *next* retraining network') self.retrain_best = self.model.get_weights() self.at_epoch += 1
def on_worker_config(self, worker, msg): info = self.workers[worker] # can be both if not (msg.conf.do_training or msg.conf.do_self_play): msg = "worker not configured properly (neither self play or trainer)" raise Exception(msg) info.conf = msg.conf if info.conf.do_training: # protection against > 1 the_nn_trainer if self.the_nn_trainer is not None: raise Exception("the_nn_trainer already set") log.info("worker trainer set %s" % worker) self.the_nn_trainer = info if info.conf.do_self_play: if info.conf.self_play_batch_size < 1: raise Exception("self play and self_play_batch_size < 1 (%d)" % self.concurrent_plays) info.reset() self.free_players.append(info) log.info("worker added as self play %s" % worker) # configure player will happen in schedule_players reactor.callLater(0, self.schedule_players)
def save(self): # XXX set generation attributes man = get_manager() man.save_network(self.nn, generation_name=self.next_generation) self.do_callbacks() ############################################################################### # save a previous model for next time if self.controller.retrain_best is None: log.warning("No retraining network") return log.info("Saving retraining network with val_policy_acc: %.4f" % (self.controller.retrain_best_val_policy_acc)) # there is an undocumented keras clone function, but this is sure to work (albeit slow and evil) from ggpzero.util.keras import keras_models for_next_generation = "%s_prev" % self.next_generation prev_model = keras_models.model_from_json( self.nn.keras_model.to_json()) prev_model.set_weights(self.controller.retrain_best) prev_generation_descr = attrutil.clone(self.nn.generation_descr) prev_generation_descr.name = for_next_generation prev_nn = network.NeuralNetwork(self.nn.gdl_bases_transformer, prev_model, prev_generation_descr) man.save_network(prev_nn, for_next_generation) self.do_callbacks()
def debug(self): # good to see some outputs for x in (10, 420, 42): log.info('train input, shape: %s. Example: %s' % (self.inputs.shape, self.inputs[x])) for o in self.outputs: log.info('train output, shape: %s. Example: %s' % (o.shape, o[x]))
def on_meta_gaming(self, finish_time): log.info("%s meta Gaming: match: %s" % (self.name, self.match.match_id)) self.sm = self.match.sm.dupe() self.proxy = self.meta_create_player() # ensure we are in the right state self.sm.update_bases(self.match.get_current_state()) self.proxy.on_meta_gaming(finish_time)
def init_data_rxd(self, data): self.start_buf += data if len(self.start_buf) == self.CHALLENGE_SIZE: if self.expected_response == self.start_buf: self.logical_connection = True log.info("Logical connection made") self.broker.new_broker_client(self) else: self.logical_connection = True log.error("Logical connection failed") self.disconnect()
def on_meta_gaming(self, finish_time): log.info("%s meta Gaming: match: %s" % (self.name, self.match.match_id)) self.sm = self.match.sm.dupe() # get and cache fast move and legals self.joint_move = self.sm.get_joint_move() self.depth_charge_joint_move = self.sm.get_joint_move() self.depth_charge_state = self.sm.new_base_state() self.role_count = len(self.sm.get_roles()) # store the node so we can return info on move self.root = None
def play_runner(player, port): interface.initialise_k273(1, log_name_base=player.get_name()) log.initialise() ggp = GGPServer() ggp.set_player(player) site = server.Site(ggp) log.info("Running player '%s' on port %d" % (player.get_name(), port)) reactor.listenTCP(port, site) reactor.run()
def main_2(game_name, seconds_to_run): game_info = lookup.by_name(game_name) sm = game_info.get_sm() msecs_taken, rollouts, num_state_changes = go(sm, seconds_to_run) log.info("====================================================") log.info("performance test game %s" % game_name) log.info("ran for %.3f seconds, state changes %s, rollouts %s" % ((msecs_taken / 1000.0), num_state_changes, rollouts)) log.info("rollouts per second: %s" % (rollouts / (msecs_taken / 1000.0))) log.info("====================================================")
def on_epoch_begin(self, epoch, logs=None): self.at_epoch += 1 log.info('Epoch %d/%d' % (self.at_epoch, self.num_epochs)) # oh man, keras consistency... XXX try: self.target = self.params['samples'] except KeyError: self.target = self.params['steps'] * 512 self.progbar = Progbar(target=self.target) self.seen = 0
def on_sample_response(self, worker, msg): info = self.workers[worker] if msg.samples: self.add_new_samples(msg.samples) if msg.duplicates_seen: log.info("worker saw %s duplicates" % msg.duplicates_seen) log.info("len accumulated_samples: %s" % len(self.accumulated_samples)) self.free_players.append(info) reactor.callLater(0, self.schedule_players)
def main(args): port = int(args[0]) path_to_viewer = args[1] interface.initialise_k273(1, log_name_base="web") log.initialise() root = WebServer(path_to_viewer) site = server.Site(root) log.info("Running WebServer on port %d" % port) reactor.listenTCP(port, site) reactor.run()
def do_play(self, move): enter_time = time.time() if self.verbose: log.debug("do_play: %s" % (move, )) if move is not None: self.apply_move(move) current_state = self.get_current_state() if self.verbose: current_str = self.game_info.model.basestate_to_str(current_state) log.info("Current state : '%s'" % current_str) self.sm.update_bases(current_state) if self.sm.is_terminal(): return "done" end_time = enter_time + self.move_time if self.cushion_time > 0: end_time -= self.cushion_time legal_choice = self.player.on_next_move(end_time) # we have no idea what on_next_move() left the state machine. So reverting it back to # correct state here. self.sm.update_bases(self.get_current_state()) # get possible possible legal moves and check 'move' is a valid ls = self.sm.get_legal_state(self.our_role_index) # store last move (in our own mapping, *not* gamemaster) self.last_played_move = self.sm.legal_to_move(self.our_role_index, legal_choice) # check the move remaps and is a legal choice move = self.legal_to_gamemaster_move(legal_choice) legal_moves = [ self.legal_to_gamemaster_move(ls.get_legal(ii)) for ii in range(ls.get_count()) ] if move not in legal_moves: msg = "Choice was %s not in legal choices %s" % (move, legal_moves) log.critical(msg) raise CriticalError(msg) if self.verbose: log.info("(%s) do_play '%s' sending move: %s" % (self.player.name, self.role, move)) return move
def on_sample_response(self, worker, msg): info = self.workers[worker] if len(msg.samples) > 0: dupe_count = self.add_new_samples(msg.samples) if dupe_count: log.warning("dropping %s inflight duplicate state(s)" % dupe_count) if msg.duplicates_seen: log.info("worker saw %s duplicates" % msg.duplicates_seen) log.info("len accumulated_samples: %s" % len(self.accumulated_samples)) self.free_players.append(info) reactor.callLater(0, self.schedule_players)