def main(values_path=None): """Serve TTT with a learned policy.""" global global_policy global global_feature_map global global_value_function state_path = "server_state.pickle.gz" if values_path is None: logger.info("loading precomputed server state") with specmine.openz(state_path) as values_file: state = pickle.load(values_file) logger.info("done loading server state") else: state = prepare_state(values_path) with specmine.openz(state_path, "wb") as state_file: pickle.dump(state, state_file) (global_policy, global_feature_map, global_value_function) = state server.run(host="0.0.0.0")
def main(values_path = None): """Serve TTT with a learned policy.""" global global_policy global global_feature_map global global_value_function state_path = "server_state.pickle.gz" if values_path is None: logger.info("loading precomputed server state") with specmine.openz(state_path) as values_file: state = pickle.load(values_file) logger.info("done loading server state") else: state = prepare_state(values_path) with specmine.openz(state_path, "wb") as state_file: pickle.dump(state, state_file) (global_policy, global_feature_map, global_value_function) = state server.run(host = "0.0.0.0")
def get_template_map(m, n, B=numpy.inf, symmetric=True): '''loads template map from file if available or generates the feature map. If B is given as inf, then max number of features are used.''' if symmetric: path = str.format('specmine/data/feature_maps/template_feature_map.{a}x{b}.symmetric.pickle.gz',a=m,b=n) else: path = str.format('specmine/data/feature_maps/template_feature_map.{a}x{b}.pickle.gz',a=m,b=n) if os.path.isfile(path): #if available, use precomputed feature map logger.info("using precomputed features at %s", path) with specmine.openz(path) as featuremap_file: full_feature_map = pickle.load(featuremap_file) else: # generate and save for next time logger.info("generating complete %i by %i feature map",m,n) full_feature_map = specmine.feature_maps.TemplateFeatureMap(m,n) logger.info('saving computed feature map: %s', path) with specmine.util.openz(path, "wb") as out_file: pickle.dump(full_feature_map, out_file) if B == numpy.inf: return full_feature_map else: return full_feature_map.gen_map(B)
def prepare_state(values_path): B = 200 with specmine.openz(values_path) as values_file: values = pickle.load(values_file) logger.info("converting states to their vector representation") states_adict = specmine.tictac.load_adjacency_dict() (gameplay_NN, gameplay_index) = specmine.discovery.adjacency_dict_to_matrix(states_adict) basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B) feature_map = specmine.discovery.TabularFeatureMap(basis_NB, gameplay_index) # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player = -1) opponent_policy = specmine.rl.RandomPolicy(opponent_domain) domain = specmine.rl.TicTacToeDomain(player = 1, opponent = opponent_policy) # prepare features and targets states = list(values) state_features = numpy.array([feature_map[s] for s in states]) state_values = numpy.array([values[s] for s in states]) # learn a value function logger.info("fitting value function predictor") ridge = sklearn.linear_model.Ridge(alpha = 1.0) ridge.fit(state_features, state_values) value_function = specmine.rl.LinearValueFunction(feature_map, ridge.coef_) policy = specmine.rl.StateValueFunctionPolicy(domain, value_function) return (policy, feature_map, value_function)
def main(out_path, values_path, neighbors = 8, workers = 0): """Run TTT state-clustering experiment(s).""" # load the value function with specmine.openz(values_path) as values_file: values = pickle.load(values_file) # convert states to their vector representations states_adict = specmine.tictac.load_adjacency_dict() states = list(states_adict) logger.info("converting states to their vector representation") affinity_index = dict(zip(states, xrange(len(states)))) vectors_ND = numpy.array(map(raw_state_features, states)) # build the affinity graph affinity_NN = specmine.discovery.affinity_graph(vectors_ND, neighbors) (gameplay_NN, gameplay_index) = specmine.discovery.adjacency_dict_to_matrix(states_adict) def yield_jobs(): for B in numpy.r_[0:400:64j].astype(int): yield (run_random_features, [B, vectors_ND, affinity_index, values]) yield (run_graph_features, ["gameplay", B, vectors_ND, gameplay_NN, gameplay_index, values]) yield (run_graph_features, ["affinity", B, vectors_ND, affinity_NN, affinity_index, values]) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["map_name", "features", "score_mean", "score_variance"]) condor.do_or_distribute(yield_jobs(), workers, lambda _, r: writer.writerow(r))
def main(out_path, opponent_path=None): """Compute the TTT value function using value iteration.""" # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player=-1) if opponent_path is None: opponent_policy = specmine.rl.RandomPolicy(opponent_domain) else: logger.info("loading opponent policy from %s", opponent_path) with specmine.openz(opponent_path) as opponent_file: opponent_policy = pickle.load(opponent_file) domain = specmine.rl.TicTacToeDomain(player=1, opponent=opponent_policy) # compute the value function #values = specmine.rl.compute_state_values_table(domain) values = specmine.rl.compute_state_values_table_nondet(domain) # and store it with specmine.openz(out_path, "wb") as out_file: pickle.dump(values, out_file)
def main(out_path, opponent_path = None): """Compute the TTT value function using value iteration.""" # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player = -1) if opponent_path is None: opponent_policy = specmine.rl.RandomPolicy(opponent_domain) else: logger.info("loading opponent policy from %s", opponent_path) with specmine.openz(opponent_path) as opponent_file: opponent_policy = pickle.load(opponent_file) domain = specmine.rl.TicTacToeDomain(player = 1, opponent = opponent_policy) # compute the value function #values = specmine.rl.compute_state_values_table(domain) values = specmine.rl.compute_state_values_table_nondet(domain) # and store it with specmine.openz(out_path, "wb") as out_file: pickle.dump(values, out_file)
def main(out_path, states_path=None, render_with=None, coloring_path=None): """Visualize a state space graph.""" if states_path is None: states = specmine.tictac.load_adjacency_dict() else: with specmine.openz(states_path) as states_file: states = pickle.load(states_file) logger.info("writing %i-vertex graph to %s", len(states), out_path) if coloring_path is None: coloring = None else: with specmine.util.openz(coloring_path) as pickle_file: coloring = pickle.load(pickle_file) assert len(coloring) == len(states) specmine.graphviz.visualize_graph(out_path, states, render_with, coloring)
def main(out_path, values_path, neighbors=8, workers=0): """Run TTT state-clustering experiment(s).""" # load the value function with specmine.openz(values_path) as values_file: values = pickle.load(values_file) # convert states to their vector representations states_adict = specmine.tictac.load_adjacency_dict() states = list(states_adict) logger.info("converting states to their vector representation") affinity_index = dict(zip(states, xrange(len(states)))) vectors_ND = numpy.array(map(raw_state_features, states)) # build the affinity graph affinity_NN = specmine.discovery.affinity_graph(vectors_ND, neighbors) (gameplay_NN, gameplay_index ) = specmine.discovery.adjacency_dict_to_matrix(states_adict) def yield_jobs(): for B in numpy.r_[0:400:64j].astype(int): yield (run_random_features, [B, vectors_ND, affinity_index, values]) yield (run_graph_features, [ "gameplay", B, vectors_ND, gameplay_NN, gameplay_index, values ]) yield (run_graph_features, [ "affinity", B, vectors_ND, affinity_NN, affinity_index, values ]) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow( ["map_name", "features", "score_mean", "score_variance"]) condor.do_or_distribute(yield_jobs(), workers, lambda _, r: writer.writerow(r))
def main(out_path, player=-1): """Compute the optimal TTT policy for the specified player.""" # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player=-1 * player) opponent_policy = specmine.rl.RandomPolicy(opponent_domain) domain = specmine.rl.TicTacToeDomain(player=player, opponent=opponent_policy) # compute the optimal policy policy = {} for state in domain.states: (board, player_to_move) = state if player == player_to_move: (move_i, move_j, _) = specmine.tictac.ab_optimal_move(board, player) move = (move_i, move_j) policy[state] = move else: actions = list(domain.actions_in(state)) if actions: (move, ) = actions policy[state] = move else: move = "(Terminal)" logger.info("optimal move in %s: %s", board._grid.astype(int).tolist(), move) # and store it with specmine.openz(out_path, "wb") as out_file: pickle.dump(policy, out_file)
def main(out_path, number=9): """Analyze eigenvectors in the TTT domain.""" B = number adict = specmine.tictac.load_adjacency_dict() (gameplay_NN, index) = specmine.discovery.adjacency_dict_to_matrix(adict) basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B) start = specmine.tictac.BoardState() rows = [] for i in xrange(3): for j in xrange(3): board = start.make_move(1, i, j) n = index[(board, -1)] for b in xrange(B): rows.append([b, i, j, basis_NB[n, b]]) with specmine.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["number", "i", "j", "value"]) writer.writerows(rows)
def prepare_state(values_path): B = 200 with specmine.openz(values_path) as values_file: values = pickle.load(values_file) logger.info("converting states to their vector representation") states_adict = specmine.tictac.load_adjacency_dict() (gameplay_NN, gameplay_index ) = specmine.discovery.adjacency_dict_to_matrix(states_adict) basis_NB = specmine.spectral.laplacian_basis(gameplay_NN, B) feature_map = specmine.discovery.TabularFeatureMap(basis_NB, gameplay_index) # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player=-1) opponent_policy = specmine.rl.RandomPolicy(opponent_domain) domain = specmine.rl.TicTacToeDomain(player=1, opponent=opponent_policy) # prepare features and targets states = list(values) state_features = numpy.array([feature_map[s] for s in states]) state_values = numpy.array([values[s] for s in states]) # learn a value function logger.info("fitting value function predictor") ridge = sklearn.linear_model.Ridge(alpha=1.0) ridge.fit(state_features, state_values) value_function = specmine.rl.LinearValueFunction(feature_map, ridge.coef_) policy = specmine.rl.StateValueFunctionPolicy(domain, value_function) return (policy, feature_map, value_function)
def main(out_path, player=-1): """Compute the optimal TTT policy for the specified player.""" # construct domain opponent_domain = specmine.rl.TicTacToeDomain(player=-1 * player) opponent_policy = specmine.rl.RandomPolicy(opponent_domain) domain = specmine.rl.TicTacToeDomain(player=player, opponent=opponent_policy) # compute the optimal policy policy = {} for state in domain.states: (board, player_to_move) = state if player == player_to_move: (move_i, move_j, _) = specmine.tictac.ab_optimal_move(board, player) move = (move_i, move_j) policy[state] = move else: actions = list(domain.actions_in(state)) if actions: (move,) = actions policy[state] = move else: move = "(Terminal)" logger.info("optimal move in %s: %s", board._grid.astype(int).tolist(), move) # and store it with specmine.openz(out_path, "wb") as out_file: pickle.dump(policy, out_file)
def get_laplacian_map(boards=None, num_samples=10000, max_eigs=500, neighbors=8, \ affinity_map = specmine.feature_maps.flat_affinity_map, eig_solver="arpack"): root,dirs,files = os.walk('./specmine/data/feature_maps/').next() curr_dir = 'specmine/data/feature_maps/' if affinity_map == specmine.feature_maps.flat_affinity_map: aff = 'flat' elif type(affinity_map) == specmine.feature_maps.TemplateFeatureMap: logger.info('getting laplacian map using template features as affinity') aff = '2x2_sym_template' affinity_map = affinity_map.__getitem__ else: aff= '?' path_front = str.format('laplacian.ngs={s}.nan={k}.aff={a}.{ei}',s=int(num_samples), k=neighbors, a=aff, ei = eig_solver) path_end = '.pickle.gz' precomp = False for f in files: match = re.search(path_front+'.nf=(\d+)'+path_end, f) if match is not None: path = curr_dir + match.group(0) num_feats = int(match.group(1)) if num_feats >= max_eigs: logger.info("using precomputed features at %s", path) logger.info('num features used: %i , max available: %i',num_feats,max_eigs) try: with specmine.openz(path) as featuremap_file: full_feature_map = pickle.load(featuremap_file) precomp = True break except: print 'error loading feature map' precomp = False if not precomp: path = curr_dir + path_front + str.format('.nf={n}', n= max_eigs) + path_end # generate and save for next time logger.info("generating laplacian eigenvector feature map with %i eigenvectors",max_eigs) avectors_ND = numpy.array(map(affinity_map, boards)) affinity_NN, ball_tree = specmine.feature_maps.build_affinity_graph(avectors_ND, neighbors, get_tree=True) basis_NB = specmine.spectral.laplacian_basis(affinity_NN, max_eigs, method = eig_solver) full_feature_map = specmine.feature_maps.InterpolationFeatureMap(basis_NB, \ specmine.feature_maps.flat_affinity_map, ball_tree) try: logger.info('saving computed laplacian feature map: %s', path) with specmine.util.openz(path, "wb") as out_file: pickle.dump(full_feature_map, out_file) except: print 'error trying to save laplacian map' return full_feature_map